library(readxl)
library(clusterProfiler)
clusterProfiler v4.8.2  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/

If you use clusterProfiler in published research, please cite:
T Wu, E Hu, S Xu, M Chen, P Guo, Z Dai, T Feng, L Zhou, W Tang, L Zhan, X Fu, S Liu, X Bo, and G Yu. clusterProfiler 4.0: A universal enrichment tool for interpreting omics data. The Innovation. 2021, 2(3):100141

Attaching package: ‘clusterProfiler’

The following object is masked from ‘package:biomaRt’:

    select

The following object is masked from ‘package:IRanges’:

    slice

The following object is masked from ‘package:S4Vectors’:

    rename

The following object is masked from ‘package:stats’:

    filter
library(GOplot)
Loading required package: ggplot2
Loading required package: ggdendro
Loading required package: gridExtra

Attaching package: ‘gridExtra’

The following object is masked from ‘package:Biobase’:

    combine

The following object is masked from ‘package:BiocGenerics’:

    combine

Loading required package: RColorBrewer
library(DOSE)
DOSE v3.26.1  For help: https://yulab-smu.top/biomedical-knowledge-mining-book/

If you use DOSE in published research, please cite:
Guangchuang Yu, Li-Gen Wang, Guang-Rong Yan, Qing-Yu He. DOSE: an R/Bioconductor package for Disease Ontology Semantic and Enrichment analysis. Bioinformatics 2015, 31(4):608-609
library(topGO)
Loading required package: graph
Loading required package: GO.db
Loading required package: AnnotationDbi

Attaching package: ‘AnnotationDbi’

The following object is masked from ‘package:clusterProfiler’:

    select

Loading required package: SparseM

Attaching package: ‘SparseM’

The following object is masked from ‘package:base’:

    backsolve


groupGOTerms:   GOBPTerm, GOMFTerm, GOCCTerm environments built.

Attaching package: ‘topGO’

The following object is masked from ‘package:IRanges’:

    members
library(circlize)
========================================
circlize version 0.4.15
CRAN page: https://cran.r-project.org/package=circlize
Github page: https://github.com/jokergoo/circlize
Documentation: https://jokergoo.github.io/circlize_book/book/

If you use it in published research, please cite:
Gu, Z. circlize implements and enhances circular visualization
  in R. Bioinformatics 2014.

This message can be suppressed by:
  suppressPackageStartupMessages(library(circlize))
========================================


Attaching package: ‘circlize’

The following object is masked from ‘package:graph’:

    degree
library("pheatmap")

library("ggsci")
library("ggplot2")
library("gridExtra")

library("survival")
library("survminer")
Loading required package: ggpubr

Attaching package: ‘survminer’

The following object is masked from ‘package:survival’:

    myeloma
library(ggpubr)
library(rstpm2)
library(corrplot)
corrplot 0.92 loaded
library(Hmisc)
Registered S3 method overwritten by 'htmlwidgets':
  method           from         
  print.htmlwidget tools:rstudio

Attaching package: ‘Hmisc’

The following object is masked from ‘package:AnnotationDbi’:

    contents

The following object is masked from ‘package:ggdendro’:

    label

The following object is masked from ‘package:Biobase’:

    contents

The following objects are masked from ‘package:base’:

    format.pval, units
library(cowplot)

Attaching package: ‘cowplot’

The following object is masked from ‘package:ggpubr’:

    get_legend
library(reshape)

Attaching package: ‘reshape’

The following object is masked from ‘package:cowplot’:

    stamp

The following object is masked from ‘package:clusterProfiler’:

    rename

The following objects are masked from ‘package:S4Vectors’:

    expand, rename
library(ggbreak)
ggbreak v0.1.2

If you use ggbreak in published research, please cite the following paper:

S Xu, M Chen, T Feng, L Zhan, L Zhou, G Yu. Use ggbreak to effectively utilize plotting space to deal with large datasets and outliers.
Frontiers in Genetics. 2021, 12:774846. doi: 10.3389/fgene.2021.774846 
library(ggrepel)
GiniClin <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "Gini_Clinic")

GiniClin <- as.data.frame(GiniClin, row.names = GiniClin$ID)

Figure 1

f1a <- ggplot(GiniClin, aes(x=GiniIndex)) +
  geom_histogram(colour = "black", fill="#4dbbd5ff", bins = 50) +
  theme_classic() +
  theme(axis.text = element_text(size = 20)) +
  labs(title = "Gini_distribution", y = "Patient count") +
  scale_x_continuous(expand = c(0,0), limits = c(0, 0.8)) + scale_y_continuous(expand = c(0,0), limits = c(0,25))

f1a
Warning: Removed 2 rows containing missing values (geom_bar).

qqnorm(GiniClin_tumor$GiniIndex, ylab = 'GiniIndex')
qqline(GiniClin_tumor$GiniIndex) # 增加趋势直线,利于比较

# H0 is that the data fit normal distribution
shapiro.test(GiniClin_tumor$GiniIndex)

    Shapiro-Wilk normality test

data:  GiniClin_tumor$GiniIndex
W = 0.92421, p-value = 4.017e-08
GiniClin_AcSq <- GiniClin[GiniClin$histology_Hans_B_WHO_2015 %in% c("AC","SqCC"), ]

f1a_2 <- ggplot(GiniClin_AcSq, aes(x=GiniIndex, fill = histology_Hans_B_WHO_2015)) +
  geom_histogram(color = "black", position = "identity", bins = 50, size = 0.3) +
  theme_classic() +
  theme(axis.text = element_text(size = 20, colour = "black"), legend.position = c(0.8,0.8)) +
  labs(title = "Gini_distribution", fill = "Histology") +
  scale_fill_manual(values = alpha(c("#F8766D", "#00BFC4"), 0.75)) +
  scale_x_continuous(expand = c(0,0), limits = c(0,0.85)) + scale_y_continuous(expand = c(0,0), limits = c(0,17), breaks = c(seq(0,15,5)))

f1a_2
Warning: Removed 4 rows containing missing values (geom_bar).

GiniClin_tumor <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "Gini_Clinic_tumor")

GiniClin_tumor <- as.data.frame(GiniClin_tumor, row.names = GiniClin_tumor$ID)

Box plot

# histology
f1b_3 <- ggplot(GiniClin_tumor, aes(x = histology_Hans_B_WHO_2015, y = GiniIndex, fill=histology_Hans_B_WHO_2015)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  #coord_flip() +
  scale_fill_manual(values = c("#F8766D", "#999999", "#00BF7D","#00B0F6","#A3A500", "#00BFC4")) +
  labs(title = "Histology", x = "Histology") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

f1b_3

GiniClin_tumor[GiniClin_tumor$gender == "M" ,"gender"] <- "Male"
GiniClin_tumor[GiniClin_tumor$gender == "F" ,"gender"] <- "Female"

f1b_4 <- ggplot(GiniClin_tumor, aes(x = gender, y = GiniIndex, fill=gender)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  # scale_fill_npg() +
  labs(title = "Gender", x = "Sex") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

f1b_4

# WHO PS
GiniClin_tumor[GiniClin_tumor$WHO_PS == 0 ,"WHO_PS_group"] <- 0
GiniClin_tumor[GiniClin_tumor$WHO_PS > 0 ,"WHO_PS_group"] <- "> 0"

f1b_6 <- ggplot(GiniClin_tumor, aes(x = WHO_PS_group, y = GiniIndex, fill=WHO_PS_group)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
  labs(title = "WHO performance status", x = "WHO performance rank") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)

f1b_6

AC/SqCC

ggplot(GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "AC", ], aes(x = stageGroup, y = GiniIndex, fill=stageGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  # scale_color_npg() +
  labs(title = "Stage", x = "Stage") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

ggplot(GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "SqCC", ], aes(x = stageGroup, y = GiniIndex, fill=stageGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  # scale_color_npg() +
  labs(title = "Stage", x = "Stage") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

ggplot(GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "AC", ], aes(x = WHO_PS_group, y = GiniIndex, fill=WHO_PS_group)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
  labs(title = "WHO performance status", x = "WHO performance rank") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)

ggplot(GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "SqCC", ], aes(x = WHO_PS_group, y = GiniIndex, fill=WHO_PS_group)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
  labs(title = "WHO performance status", x = "WHO performance rank") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)

Figure 2

library("survival")
library("survminer")

Attaching package: ‘survminer’

The following object is masked from ‘package:survival’:

    myeloma

Figure2A: KM plot

All patient

GiniClin_tumor[GiniClin_tumor$GiniIndex >= 0.25431, "giniGroup"] <- "Above median"
GiniClin_tumor[GiniClin_tumor$GiniIndex < 0.25431, "giniGroup"] <- "Under median"
GiniClin_tumor$giniGroup = factor(GiniClin_tumor$giniGroup, levels = c("Above median", "Under median"))
# construct the object
fig2test <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_tumor)

print(fig2test)
Call: survfit(formula = Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ 
    giniGroup, data = GiniClin_tumor)

                        n events median 0.95LCL 0.95UCL
giniGroup=Under median 91     46   4.99    3.55      NA
giniGroup=Above median 91     40     NA    4.80      NA
summary(fig2test)$table
                       records n.max n.start events    rmean se(rmean)   median  0.95LCL 0.95UCL
giniGroup=Under median      91    91      91     46 3.746049 0.1583925 4.985626 3.545517      NA
giniGroup=Above median      91    91      91     40 3.680266 0.1885308       NA 4.799452      NA
fig2a_all
Warning messages:
1: In overlay(...) :
  reverting 'unnamed.chunk.label' to 'unnamed-chunk' for duration of render
2: In overlay(...) :
  reverting 'unnamed.chunk.label' to 'unnamed-chunk' for duration of render
3: In overlay(...) :
  reverting 'unnamed.chunk.label' to 'unnamed-chunk' for duration of render

fig2a_continue.cox <- coxph(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ GiniIndex, data = GiniClin_tumor)
fig2a_continue.cox
Call:
coxph(formula = Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ 
    GiniIndex, data = GiniClin_tumor)

             coef exp(coef) se(coef)      z     p
GiniIndex -0.5778    0.5611   1.0416 -0.555 0.579

Likelihood ratio test=0.31  on 1 df, p=0.5749
n= 182, number of events= 86 
ggsurvplot(survfit(fig2a_continue.cox, data=GiniClin_tumor), color = "#2E9FDF",ggtheme = theme_minimal())
Warning: Now, to change color palette, use the argument palette= '#2E9FDF' instead of color = '#2E9FDF'

Time split surv

fig2_timespl.cox <- coxph(Surv(tstart, OS_years_5_years_trunk_20190329, event==1) ~ giniGroup_nr:strata(tgroup), data=fig2_timespl, ties="efron")
summary(fig2_timespl.cox)
Call:
coxph(formula = Surv(tstart, OS_years_5_years_trunk_20190329, 
    event == 1) ~ giniGroup_nr:strata(tgroup), data = fig2_timespl, 
    ties = "efron")

  n= 323, number of events= 85 

                                       coef exp(coef) se(coef)      z Pr(>|z|)  
giniGroup_nr:strata(tgroup)tgroup=1  0.5457    1.7258   0.3292  1.658   0.0974 .
giniGroup_nr:strata(tgroup)tgroup=2 -0.7484    0.4731   0.3150 -2.376   0.0175 *
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

                                    exp(coef) exp(-coef) lower .95 upper .95
giniGroup_nr:strata(tgroup)tgroup=1    1.7258     0.5794    0.9052    3.2901
giniGroup_nr:strata(tgroup)tgroup=2    0.4731     2.1137    0.2552    0.8772

Concordance= 0.58  (se = 0.027 )
Likelihood ratio test= 8.86  on 2 df,   p=0.01
Wald test            = 8.39  on 2 df,   p=0.02
Score (logrank) test = 8.73  on 2 df,   p=0.01
cox.zph(fig2_timespl.cox)
                            chisq df    p
giniGroup_nr:strata(tgroup)  1.22  2 0.54
GLOBAL                       1.22  2 0.54
fig2_mod_tvc <- stpm2(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329 == 1) ~ giniGroup_nr, data = GiniClin_tumor, df=3, tvc=list(giniGroup_nr=1))
Warning in gsm(formula = Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329 ==  :
  Some event times <= 0
plot(fig2_mod_tvc, newdata = data.frame(giniGroup_nr = 0), type = "hazard",  xlim=c(0.5,5), ylim=c(0,0.2), ylab = "Hazard", xlab = "Time", lwd = 2, ci = FALSE)
Warning in rug(eventTimes, col = line.col) :
  some values will be clipped
plot(fig2_mod_tvc, newdata = data.frame(giniGroup_nr = 1), type = "hazard", line.col=2, ci=FALSE, lwd = 2, add=TRUE)

Calculate best cutoff

fig2.cut <- surv_cutpoint(GiniClin_tumor,
                          time = "OS_years_5_years_trunk_20190329",
                          event = "status_5_years_trunk_20190329",
                          variables = "GiniIndex")
summary(fig2.cut)
plot(fig2.cut, "GiniIndex", palette = "npg")
$GiniIndex
Warning message:
In overlay(...) :
  reverting 'unnamed.chunk.label' to 'unnamed-chunk' for duration of render

Calculate AC and SqCC

plot(fig2.cut_AC, "GiniIndex", palette = "npg")
$GiniIndex

KM without censoring

fig2_NoCensor_test <- survfit(Surv(OS_years_20190329, status_out_20190329) ~ giniGroup, data = GiniClin_tumor)

print(fig2_NoCensor_test)
Call: survfit(formula = Surv(OS_years_20190329, status_out_20190329) ~ 
    giniGroup, data = GiniClin_tumor)

                        n events median 0.95LCL 0.95UCL
giniGroup=Under median 91     66   5.17    3.55    7.59
giniGroup=Above median 91     53   6.66    4.80      NA
summary(fig2_NoCensor_test)$table
                       records n.max n.start events    rmean se(rmean)   median  0.95LCL 0.95UCL
giniGroup=Under median      91    91      91     66 6.482247 0.4825654 5.169062 3.545517 7.59206
giniGroup=Above median      91    91      91     53 7.380460 0.5523298 6.661191 4.799452      NA

fig2_NoCensor.cut <- surv_cutpoint(GiniClin_tumor,
                          time = "OS_years_20190329",
                          event = "status_out_20190329",
                          variables = "GiniIndex")
summary(fig2_NoCensor.cut)
plot(fig2_NoCensor.cut, "GiniIndex", palette = "npg")
$GiniIndex

ggsurvplot(fig2_NoCensor.cut.fit,
                        pval = TRUE,
                        palette = c("#e64b35", "#4dbbd5"),
                        break.time.by = 2,
                        ggtheme = theme_classic(),
                        legend.labs = c("Above median", "Under median"),
                        risk.table = "abs_pct",
                        risk.table.y.text = FALSE,
                        xlab = "Time (years)",
                        title = "giniSurv_without_censoring",
                        axes.offset = FALSE,
                        risk.table.fontsize = 2.5,
                        legend = c(0.9,0.9),
                        risk.table.title = "Number at risk by time: n (%)",
                        font.main = c())

AC patient

GiniClin_tumor_Ac = GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "AC", ]

fig2ACtest <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_tumor_Ac)

print(fig2ACtest)
Call: survfit(formula = Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ 
    giniGroup, data = GiniClin_tumor_Ac)

                        n events median 0.95LCL 0.95UCL
giniGroup=Above median 50     20     NA    4.80      NA
giniGroup=Under median 57     31   4.07    2.95      NA
print("====")
[1] "===="
summary(fig2ACtest)$table
                       records n.max n.start events    rmean se(rmean)   median  0.95LCL 0.95UCL
giniGroup=Above median      50    50      50     20 3.889363 0.2358844       NA 4.799452      NA
giniGroup=Under median      57    57      57     31 3.523674 0.2151269 4.071184 2.945927      NA

without sensoring

fig2AC_NoCensor_test <- survfit(Surv(OS_years_20190329, status_out_20190329) ~ giniGroup, data = GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "AC", ])

print(fig2AC_NoCensor_test)
Call: survfit(formula = Surv(OS_years_20190329, status_out_20190329) ~ 
    giniGroup, data = GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == 
    "AC", ])

                        n events median 0.95LCL 0.95UCL
giniGroup=Under median 57     40   4.07    2.95    7.59
giniGroup=Above median 50     28   7.31    4.80      NA
summary(fig2AC_NoCensor_test)$table
                       records n.max n.start events    rmean se(rmean)   median  0.95LCL 0.95UCL
giniGroup=Under median      57    57      57     40 6.139774 0.6418922 4.071184 2.945927 7.59206
giniGroup=Above median      50    50      50     28 7.749610 0.7251560 7.305955 4.799452      NA
ggsurvplot(fig2AC_NoCensor_test,
          pval = TRUE,
          palette = c("#e64b35", "#4dbbd5"),
          break.time.by = 1,
          ggtheme = theme_classic(),
          legend.labs = c("Above median", "Under median"),
          risk.table = "abs_pct",
          risk.table.y.text = FALSE,
          xlab = "Time (years)",
          title = "giniSurv_without_censoring",
          axes.offset = FALSE,
          risk.table.fontsize = 2.5,
          legend = c(0.9,0.9),
          risk.table.title = "Number at risk by time: n (%)",
          font.main = c())

SqCC patient

GiniClin_tumor_SqCC = GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "SqCC", ]

fig2SqCCtest_2 <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_tumor_SqCC)

print(fig2SqCCtest)
Call: survfit(formula = Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ 
    giniGroup, data = GiniClin_SqCC)

                        n events median 0.95LCL 0.95UCL
giniGroup=Above median 41     21   4.86    3.20      NA
giniGroup=Under median 32     12     NA    4.39      NA
print("====")
[1] "===="
summary(fig2SqCCtest)$table
                       records n.max n.start events    rmean se(rmean)   median  0.95LCL 0.95UCL
giniGroup=Above median      41    41      41     21 3.366955 0.2968385 4.856947 3.195072      NA
giniGroup=Under median      32    32      32     12 4.213723 0.2114153       NA 4.388775      NA

fig2_SqCC_timespl <- survSplit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329 == 1) ~ ., data=GiniClin_tumor_SqCC, cut=2, episode = "tgroup")

fig2_SqCC_timespl.cox <- coxph(Surv(tstart, OS_years_5_years_trunk_20190329, event==1) ~ giniGroup:strata(tgroup), data=fig2_SqCC_timespl, ties="efron")
summary(fig2_SqCC_timespl.cox)
Call:
coxph(formula = Surv(tstart, OS_years_5_years_trunk_20190329, 
    event == 1) ~ giniGroup:strata(tgroup), data = fig2_SqCC_timespl, 
    ties = "efron")

  n= 109, number of events= 28 

                                                coef exp(coef) se(coef)      z Pr(>|z|)  
giniGroupAbove median:strata(tgroup)tgroup=1  1.7134    5.5476   0.7602  2.254   0.0242 *
giniGroupUnder median:strata(tgroup)tgroup=1      NA        NA   0.0000     NA       NA  
giniGroupAbove median:strata(tgroup)tgroup=2 -0.9789    0.3757   0.6018 -1.627   0.1038  
giniGroupUnder median:strata(tgroup)tgroup=2      NA        NA   0.0000     NA       NA  
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

                                             exp(coef) exp(-coef) lower .95 upper .95
giniGroupAbove median:strata(tgroup)tgroup=1    5.5476     0.1803    1.2503    24.615
giniGroupUnder median:strata(tgroup)tgroup=1        NA         NA        NA        NA
giniGroupAbove median:strata(tgroup)tgroup=2    0.3757     2.6615    0.1155     1.222
giniGroupUnder median:strata(tgroup)tgroup=2        NA         NA        NA        NA

Concordance= 0.652  (se = 0.039 )
Likelihood ratio test= 10.24  on 2 df,   p=0.006
Wald test            = 7.73  on 2 df,   p=0.02
Score (logrank) test = 9.3  on 2 df,   p=0.01

without sensoring

fig2SqCC_NoCensor_test <- survfit(Surv(OS_years_20190329, status_out_20190329) ~ giniGroup, data = GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "SqCC", ])

print(fig2SqCC_NoCensor_test)
Call: survfit(formula = Surv(OS_years_20190329, status_out_20190329) ~ 
    giniGroup, data = GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == 
    "SqCC", ])

                        n events median 0.95LCL 0.95UCL
giniGroup=Under median 26     21   6.38    3.50    8.97
giniGroup=Above median 36     22   7.05    1.96      NA
summary(fig2SqCC_NoCensor_test)$table
                       records n.max n.start events    rmean se(rmean)   median  0.95LCL  0.95UCL
giniGroup=Under median      26    26      26     21 6.777249 0.7798128 6.384668 3.501711 8.969199
giniGroup=Above median      36    36      36     22 7.032550 0.8931418 7.052704 1.960301       NA
ggsurvplot(fig2SqCC_NoCensor_test,
          pval = TRUE,
          palette = c("#e64b35", "#4dbbd5"),
          break.time.by = 1,
          ggtheme = theme_classic(),
          legend.labs = c("Above median", "Under median"),
          risk.table = "abs_pct",
          risk.table.y.text = FALSE,
          xlab = "Time (years)",
          title = "giniSurv_without_censoring",
          axes.offset = FALSE,
          risk.table.fontsize = 2.5,
          legend = c(0.9,0.9),
          risk.table.title = "Number at risk by time: n (%)",
          font.main = c())

Table: Cox regression

Single variate

fig2.cox <- coxph(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_tumor)

print("===")
[1] "==="
fig2.cox
Call:
coxph(formula = Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ 
    giniGroup, data = GiniClin_tumor)

                         coef exp(coef) se(coef)      z    p
giniGroupAbove median -0.1166    0.8900   0.2165 -0.539 0.59

Likelihood ratio test=0.29  on 1 df, p=0.5898
n= 182, number of events= 86 
print("===")
[1] "==="
print(summary(fig2.cox))
Call:
coxph(formula = Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ 
    giniGroup, data = GiniClin_tumor)

  n= 182, number of events= 86 

                         coef exp(coef) se(coef)      z Pr(>|z|)
giniGroupAbove median -0.1166    0.8900   0.2165 -0.539     0.59

                      exp(coef) exp(-coef) lower .95 upper .95
giniGroupAbove median      0.89      1.124    0.5822      1.36

Concordance= 0.503  (se = 0.028 )
Likelihood ratio test= 0.29  on 1 df,   p=0.6
Wald test            = 0.29  on 1 df,   p=0.6
Score (logrank) test = 0.29  on 1 df,   p=0.6
print("===")
[1] "==="
cox.zph(fig2.cox)
          chisq df     p
giniGroup  5.22  1 0.022
GLOBAL     5.22  1 0.022
# Merge the histology group except AC and SqCC to others
GiniClin_tumor[!(GiniClin_tumor$histology_Hans_B_WHO_2015 %in% c("AC","SqCC")), "Histology"] <- "Other"
GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "AC", "Histology"] <- "AC"
GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "SqCC", "Histology"] <- "SqCC"
# Merge the current smoke and former smoke in to one group
GiniClin_tumor[GiniClin_tumor$smoke %in% c("Current smoke","Former smoke"), "smokeGroup"] <- "Current/Former"
GiniClin_tumor[GiniClin_tumor$smoke == "Never smoke", "smokeGroup"] <- "Never"
# Construct levels of each parameter
GiniClin_tumor$stageGroup = factor(GiniClin_tumor$stageGroup, levels = c("Low stage", "High stage"))
GiniClin_tumor$gender = factor(GiniClin_tumor$gender, levels = c("Male", "Female"))
GiniClin_tumor$Histology = factor(GiniClin_tumor$Histology, levels = c("AC", "SqCC", "Other"))
GiniClin_tumor$ageGroup = factor(GiniClin_tumor$ageGroup, levels = c("Under 70", "Above 70"))
GiniClin_tumor$smokeGroup = factor(GiniClin_tumor$smokeGroup, levels = c("Never","Current/Former"))

Multi-variate cox regression

fig2.multicox <- coxph(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup + ageGroup + gender + smokeGroup + Histology + stageGroup, data = GiniClin_tumor)

summary(fig2.multicox)
Call:
coxph(formula = Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ 
    giniGroup + ageGroup + gender + smokeGroup + Histology + 
        stageGroup, data = GiniClin_tumor)

  n= 182, number of events= 86 

                             coef exp(coef) se(coef)      z Pr(>|z|)   
giniGroupAbove median    -0.04817   0.95298  0.22266 -0.216  0.82874   
ageGroupAbove 70          0.23155   1.26055  0.21842  1.060  0.28910   
genderFemale             -0.24394   0.78354  0.22166 -1.101  0.27110   
smokeGroupCurrent/Former  0.23605   1.26624  0.39017  0.605  0.54518   
HistologySqCC            -0.18836   0.82832  0.24177 -0.779  0.43592   
HistologyOther           -0.20626   0.81362  0.44507 -0.463  0.64305   
stageGroupHigh stage      0.58274   1.79095  0.22201  2.625  0.00867 **
---
Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1

                         exp(coef) exp(-coef) lower .95 upper .95
giniGroupAbove median       0.9530     1.0493    0.6160     1.474
ageGroupAbove 70            1.2605     0.7933    0.8216     1.934
genderFemale                0.7835     1.2763    0.5074     1.210
smokeGroupCurrent/Former    1.2662     0.7897    0.5894     2.720
HistologySqCC               0.8283     1.2073    0.5157     1.330
HistologyOther              0.8136     1.2291    0.3401     1.947
stageGroupHigh stage        1.7909     0.5584    1.1591     2.767

Concordance= 0.616  (se = 0.028 )
Likelihood ratio test= 10.84  on 7 df,   p=0.1
Wald test            = 10.91  on 7 df,   p=0.1
Score (logrank) test = 11.15  on 7 df,   p=0.1

Without adjuvent treatment

# Subset the group without adjuvent treatment
GiniClin_noTreat = subset(GiniClin_tumor, Annette_adjuvant == 0)

GiniClin_noTreat = as.data.frame(GiniClin_noTreat)

GiniClin_noTreat$giniGroup = factor(GiniClin_noTreat$giniGroup, levels = c("Above median", "Under median"))

# construct the object
fig8test <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_noTreat)

print(fig8test)
Call: survfit(formula = Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ 
    giniGroup, data = GiniClin_noTreat)

                        n events median 0.95LCL 0.95UCL
giniGroup=Above median 48     18     NA      NA      NA
giniGroup=Under median 37     17     NA    3.55      NA
summary(fig8test)$table
                       records n.max n.start events    rmean se(rmean) median  0.95LCL 0.95UCL
giniGroup=Above median      48    48      48     18 3.814767 0.2603937     NA       NA      NA
giniGroup=Under median      37    37      37     17 3.830473 0.2522535     NA 3.545517      NA

# Subset the group without adjuvent treatment
GiniClin_Treat = subset(GiniClin_tumor, Annette_adjuvant == 1)

GiniClin_Treat = as.data.frame(GiniClin_Treat)

GiniClin_Treat$giniGroup = factor(GiniClin_Treat$giniGroup, levels = c("Above median", "Under median"))

# construct the object
fig8test_2 <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_Treat)

print(fig8test_2)
Call: survfit(formula = Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ 
    giniGroup, data = GiniClin_Treat)

                        n events median 0.95LCL 0.95UCL
giniGroup=Above median 39     19     NA     3.2      NA
giniGroup=Under median 45     23   4.82     3.5      NA
summary(fig8test_2)$table
                       records n.max n.start events    rmean se(rmean)   median  0.95LCL 0.95UCL
giniGroup=Above median      39    39      39     19 3.523193 0.2922509       NA 3.195072      NA
giniGroup=Under median      45    45      45     23 3.814769 0.2146622 4.824093 3.498973      NA

Figure mutation burden

Mutation burden

mutClin <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "MutationBurdern_all_tumor")

mutClin <- as.data.frame(mutClin, row.names = mutClin$ID)

mutClin$AB_GiniIndex <- as.numeric(mutClin$AB_GiniIndex)
mutClin$Mutation_load_allmutations <- as.numeric(mutClin$Mutation_load_allmutations)
mutClin <- mutClin[intersect(rownames(mutClin), rownames(GiniClin_tumor)),]
fig4a <- ggscatter(mutClin[,c("AB_GiniIndex","Mutation_load_allmutations")], x = "AB_GiniIndex", y = "Mutation_load_allmutations",
                   add="reg.line", add.params = list(color = "#00bfc4"),
                   conf.int = FALSE,
                   cor.coef = TRUE,
                   cor.coeff.args = list(method = "spearman", label.x.npc = 0.15, label.y.npc = 0.85),
                   ggtheme = theme_classic(),
                   #add = "reg.line", add.params = c(color = '#00bfc4'),
                   title = "Mutation burden", xlab = "Gini Index", ylab = "Mutation load of all mutations",
                   font.tickslab = c(size=15, color = "black"), font.main = 20,font.y = 15, font.x = 15) +
  scale_y_continuous(expand = c(0,0), limits = c(0, 130), breaks = c(seq(0,120,30))) +
  scale_x_continuous(expand = c(0,0), limits = c(0, 0.75), breaks = c(seq(0,0.6,0.2)))

fig4a
`geom_smooth()` using formula 'y ~ x'

Mutation

for (mutGene in colnames(mutGini[,(3:84)])){
  WT_buff <- mutGini[mutGini[,mutGene] == "WT","GiniIndex"]
  Mut_buff <- mutGini[mutGini[,mutGene] == "Mutated","GiniIndex"]
  mutRes <- wilcox.test(WT_buff,Mut_buff)
  mutTest <- c(mutTest,c(mutGene,mutRes$statistic,mutRes$p.value))
}
Error in h(simpleError(msg, call)) : 
  error in evaluating the argument 'x' in selecting a method for function 'colnames': undefined columns selected

EGFR

APC

f4c.apc <- ggplot(mutGini, aes(x = APC, y = GiniIndex, fill=APC)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "APC", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
f4c.apc

TP53

f4c.tp53 <- ggplot(mutGini, aes(x = TP53, y = GiniIndex, fill=TP53)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "TP53", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
f4c.tp53

ARID1A

f4c.arid1a <- ggplot(mutGini, aes(x = ARID1A, y = GiniIndex, fill=ARID1A)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "ARID1A", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
f4c.arid1a

EPHB6

f4c.ephb6 <- ggplot(mutGini, aes(x = EPHB6, y = GiniIndex, fill=EPHB6)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "EPHB6", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
f4c.ephb6

CSMD3

f4c.csmd3 <- ggplot(mutGini, aes(x = CSMD3, y = GiniIndex, fill=CSMD3)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "CSMD3", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20, hjust = 0.5)) +
  #, panel.border = element_rect(color = "black", size = 1, fill = NA)
  stat_compare_means(paired = FALSE)
f4c.csmd3

Combine

ggarrange(f4c.egfr, f4c.apc, f4c.tp53, f4c.arid1a, f4c.ephb6, f4c.csmd3, ncol = 2, nrow = 3, align = "hv")

Figure IHC Heatmap

prepare table

ihcAnno <- merge(ihcAnno, mut_t_anno[,c("APC","ARID1A","EPHB6")],by = "row.names", all=FALSE)
Error in h(simpleError(msg, call)) : 
  error in evaluating the argument 'y' in selecting a method for function 'merge': undefined columns selected
ihcAnno_color = list(
  "Gini index" = colorRampPalette(brewer.pal(n = 10, name = "Reds"))(100),
  "KRAS" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "EGFR" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "PIK3CA" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "FGFR2"= c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "PDGFRA"= c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "ROS1"= c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "NF1" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "STK11" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "TP53" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "KEAP1" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "NFE2L2" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "MUC16" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "KMT2C" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "KMT2D" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "SMARCA4" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "CDKN2A" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "CREBBP" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "CSMD3" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "LRP1B" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "APC" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "ARID1A" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "EPHB6" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "histology" = c(AC = "#D55740", SqCC = "#4DBCD6", AdSq = "#479E88", LCC = "#415384", LCNEC = "#F49B7F", SC = "#F8BED6"),
  "gender" = c(Male = "#B9E3DF", Female = "#EFC0D5"),
  "smoke" = c(Current = "#D45F51", Former = "#E68840", Never = "#BCD9B2"),
  "surv5years" = c(Dead = "#D45F51", Alive = "#B7D2E8")
)
Warning: n too large, allowed maximum for palette Reds is 9
Returning the palette you asked for with that many colors
fig5b <- pheatmap(t(ihcHeatT_tumor_norm2), clustering_method = "ward.D2",
                  clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
                  cutree_cols = 3,
                  #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
                  annotation_col = ihcAnno,
                  annotation_colors = ihcAnno_color,
                  fontsize = 5,
                  show_colnames = F
                  )
fig5b
fig5b.cluster <- fig5b$tree_col

fig5b.cluster.plot <- plot(fig5b.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)

fig5b.cut <- cutree(fig5b.cluster, 3)

fig5b.cut_1 <- names(fig5b.cut)[fig5b.cut == 1]
fig5b.cut_2 <- names(fig5b.cut)[fig5b.cut == 2]
fig5b.cut_3 <- names(fig5b.cut)[fig5b.cut == 3]

fig5b.cut_gini <- merge(AB_Gini_anno, fig5b.cut, by = "row.names", all.y = TRUE)

fig5b.cut_gini <- as.data.frame(fig5b.cut_gini, row.names = fig5b.cut_gini$Row.names)

fig5b.cut_gini$Row.names <- NULL

fig5b.cut_gini <- rename(fig5b.cut_gini, c("y"="Cluster"))
fig5b.cut_gini[fig5b.cut_gini$Cluster == 2, "Class"] <- "Immune hot"
fig5b.cut_gini[fig5b.cut_gini$Cluster == 3, "Class"] <- "Median"
fig5b.cut_gini[fig5b.cut_gini$Cluster == 1, "Class"] <- "Immune cold"
fig5b.cut_gini$Class = factor(fig5b.cut_gini$Class, levels = c("Immune hot", "Median", "Immune cold"))
fig5b.cut_gini <- as.data.frame(fig5b.cut_gini)
fig5b.cut_gini2 <- rename(fig5b.cut_gini, c("Gini index"="GiniIndex"))
compare_means(GiniIndex ~ Class, data = fig5b.cut_gini2)
fig5b.cut_km <- merge(fig5b.cut_gini, GiniClin_tumor[, c("OS_years_5_years_trunk_20190329", "status_5_years_trunk_20190329")], by = "row.names")
fig5b.cut_km <- as.data.frame(fig5b.cut_km, row.names = fig5b.cut_km$Row.names)
fig5b.cut_km$Row.names <- NULL

fig5b.test <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ Class, data = fig5b.cut_km)

summary(fig5b.test)$table
fig5b.KM <- ggsurvplot(fig5b.test,
                        pval = TRUE,
                        palette = c("#e64b35", "#4dbbd5", "#999999"),
                        break.time.by = 1,
                        ggtheme = theme_classic(),
                        #legend.labs = c("Above median", "Under median"),
                        risk.table = "abs_pct",
                        risk.table.y.text = FALSE,
                        xlab = "Time (years)",
                        title = "giniSurv_Class",
                        xlim = c(0,5.2),
                        axes.offset = FALSE,
                        risk.table.fontsize = 2.5,
                        legend = c(0.9,0.9),
                        risk.table.title = "Number at risk by time: n (%)",
                        font.main = c())
fig5b.KM

fig5b.cut_clin <- merge(fig5b.cut_km, GiniClin_tumor[,c("histology_Hans_B_WHO_2015", "gender", "smoke")], by = "row.names", all.x = TRUE)

fig5b.cut_clin <- as.data.frame(fig5b.cut_clin, row.names = fig5b.cut_clin$Row.names)

fig5b.cut_clin$Row.names <- NULL

fig5b.cut_clin_AC <- fig5b.cut_clin[fig5b.cut_clin$histology_Hans_B_WHO_2015 == "AC", ]
fig5b.cut_clin_SqCC <- fig5b.cut_clin[fig5b.cut_clin$histology_Hans_B_WHO_2015 == "SqCC", ]

fig5b.cut_clin

fig5b.cut_clin[fig5b.cut_clin$`Gini index` >= 0.25431, "giniGroup"] <- "Above median"
fig5b.cut_clin[fig5b.cut_clin$`Gini index` < 0.25431, "giniGroup"] <- "Under median"
fig5b.cut_clin$giniGroup = factor(fig5b.cut_clin$giniGroup, levels = c("Above median", "Under median"))

cluster without gini index

pheatmap(t(ihcHeatT_tumor_norm2[,(2:25)]), clustering_method = "ward.D2",
                  clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
                  cutree_cols = 3,
                  #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
                  annotation_col = ihcAnno,
                  annotation_colors = ihcAnno_color,
                  fontsize = 5,
                  show_colnames = F
                  )

pheatmap(t(ihcHeatT_tumor_norm3[,grep("_S", colnames(ihcHeatT_tumor_norm3))]), clustering_method = "ward.D2",
          clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
          cutree_cols = 3,
          #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F
)
pheatmap(t(ihcHeatT_tumor_norm3[,grep("_S", colnames(ihcHeatT_tumor_norm3))]), clustering_method = "ward.D2",
          clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
          cutree_cols = 3,
          #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F
)
pheatmap(t(ihcHeatT_tumor_norm3[,grep("_T", colnames(ihcHeatT_tumor_norm3))]), clustering_method = "ward.D2",
          clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
          cutree_cols = 2,
          #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F
)
fig5d_T <- pheatmap(t(ihcHeatT_tumor_norm3[,grep("_T", colnames(ihcHeatT_tumor_norm3))]), clustering_method = "ward.D2",
          clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
          cutree_cols = 2,
          #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F
)
fig5d_T
fig5d_T.cluster <- fig5d_T$tree_col

fig5d_T.cluster.plot <- plot(fig5d_T.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)


fig5d_T.cut <- cutree(fig5d_T.cluster, 2)

fig5d_T.cut_1 <- names(fig5d_T.cut)[fig5d_T.cut == 1]
fig5d_T.cut_2 <- names(fig5d_T.cut)[fig5d_T.cut == 2]

fig5d_T.cut_gini <- merge(AB_Gini_anno, fig5d_T.cut, by = "row.names", all.y = TRUE)

fig5d_T.cut_gini <- as.data.frame(fig5d_T.cut_gini, row.names = fig5d_T.cut_gini$Row.names)

fig5d_T.cut_gini$Row.names <- NULL

fig5d_T.cut_gini <- rename(fig5d_T.cut_gini, c("Cluster"="y"))
fig5d_T.cut_gini[fig5d_T.cut_gini$Cluster == 1, "Class"] <- "Immune cold"
fig5d_T.cut_gini[fig5d_T.cut_gini$Cluster == 2, "Class"] <- "Immune hot"
fig5d_T.cut_gini$Class = factor(fig5d_T.cut_gini$Class, levels = c("Immune hot", "Immune cold"))
fig5d_T.box <- ggplot(fig5d_T.cut_gini, aes(x = Class, y = GiniIndex, fill=Class)) +
    geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
    theme_classic() +
    # coord_flip() +
    scale_fill_manual(values = c("#e64b35", "#4dbbd5")) +
    labs(title = "Class", x = "Class") +
    theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
    stat_compare_means(paired = NULL)

fig5d_T.box

fig5d_T.cut_km <- merge(fig5d_T.cut_gini, GiniClin_tumor[, c("OS_years_5_years_trunk_20190329", "status_5_years_trunk_20190329")], by = "row.names")
fig5d_T.cut_km <- as.data.frame(fig5d_T.cut_km, row.names = fig5d_T.cut_km$Row.names)
fig5d_T.cut_km$Row.names <- NULL

fig5d_T.test <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ Class, data = fig5d_T.cut_km)

summary(fig5d_T.test)$table
                  records n.max n.start events    rmean se(rmean) median  0.95LCL 0.95UCL
Class=Immune hot       39    39      39     18 3.704189 0.2816978     NA 4.388775      NA
Class=Immune cold     138   138     138     65 3.737826 0.1381891     NA 4.276523      NA
ggsurvplot(fig5d_T.test,
         pval = TRUE,
         palette = c("#e64b35", "#4dbbd5", "#999999"),
         break.time.by = 1,
         ggtheme = theme_classic(),
         #legend.labs = c("Above median", "Under median"),
         risk.table = "abs_pct",
         risk.table.y.text = FALSE,
         xlab = "Time (years)",
         title = "giniSurv_Class",
         xlim = c(0,5.2),
         axes.offset = FALSE,
         risk.table.fontsize = 2.5,
         legend = c(0.9,0.9),
         risk.table.title = "Number at risk by time: n (%)",
         font.main = c())

fig5d_S.cluster <- fig5d_S$tree_col

fig5d_S.cluster.plot <- plot(fig5d_S.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)

fig5d_S.cut <- cutree(fig5d_S.cluster, 3)

fig5d_S.cut_1 <- names(fig5d_S.cut)[fig5d_S.cut == 1]
fig5d_S.cut_2 <- names(fig5d_S.cut)[fig5d_S.cut == 2]
fig5d_S.cut_3 <- names(fig5d_S.cut)[fig5d_S.cut == 3]

fig5d_S.cut_gini <- merge(AB_Gini_anno, fig5d_S.cut, by = "row.names", all.y = TRUE)

fig5d_S.cut_gini <- as.data.frame(fig5d_S.cut_gini, row.names = fig5d_S.cut_gini$Row.names)

fig5d_S.cut_gini$Row.names <- NULL

fig5d_S.cut_gini <- rename(fig5d_S.cut_gini, c("Cluster"="y"))
fig5d_S.cut_gini[fig5d_S.cut_gini$Cluster == 1, "Class"] <- "Immune cold"
fig5d_S.cut_gini[fig5d_S.cut_gini$Cluster == 2, "Class"] <- "Immune median"
fig5d_S.cut_gini[fig5d_S.cut_gini$Cluster == 3, "Class"] <- "Immune hot"
fig5d_S.cut_gini$Class = factor(fig5d_S.cut_gini$Class, levels = c("Immune hot", "Median", "Immune cold"))

fig5d_S.cut_km <- merge(fig5d_S.cut_gini, GiniClin_tumor[, c("OS_years_5_years_trunk_20190329", "status_5_years_trunk_20190329")], by = "row.names")
fig5d_S.cut_km <- as.data.frame(fig5d_S.cut_km, row.names = fig5d_S.cut_km$Row.names)
fig5d_S.cut_km$Row.names <- NULL

fig5d_S.test <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ Class, data = fig5d_S.cut_km)

summary(fig5d_S.test)$table
                  records n.max n.start events    rmean se(rmean) median  0.95LCL 0.95UCL
Class=Immune hot       36    36      36     14 3.885352 0.2702259     NA 4.544832      NA
Class=Median           62    62      62     30 3.669780 0.2112939     NA 3.556468      NA
Class=Immune cold      79    79      79     39 3.707397 0.1864731     NA 4.010951      NA
ggsurvplot(fig5d_S.test,
         pval = TRUE,
         palette = c("#e64b35", "#4dbbd5", "#999999"),
         break.time.by = 1,
         ggtheme = theme_classic(),
         #legend.labs = c("Above median", "Under median"),
         risk.table = "abs_pct",
         risk.table.y.text = FALSE,
         xlab = "Time (years)",
         title = "giniSurv_Class",
         xlim = c(0,5.2),
         axes.offset = FALSE,
         risk.table.fontsize = 2.5,
         legend = c(0.9,0.9),
         risk.table.title = "Number at risk by time: n (%)",
         font.main = c())
ihcHeatT_tumor_norm4 <- rename_with(ihcHeatT_tumor_norm4, gsub("SQ1", "_"))
Error in is.factor(x) : argument "x" is missing, with no default

fig5d_ST.cut_gini[fig5d_ST.cut_gini$Cluster == 1, "Class"] <- "Immune cold"
fig5d_ST.cut_gini[fig5d_ST.cut_gini$Cluster == 2, "Class"] <- "Immune hot"
fig5d_ST.cut_gini$Class = factor(fig5d_ST.cut_gini$Class, levels = c("Immune hot", "Immune cold"))
fig5d_ST.cut_gini <- as.data.frame(fig5d_ST.cut_gini)
fig5d_ST.cut_gini <- rename(fig5d_ST.cut_gini, c("GiniIndex"="Gini index"))
compare_means(GiniIndex ~ Class, data = fig5d_ST.cut_gini)

Ordered by Gini

pheatmap(t(ihcHeatT_tumor_norm4[order(ihcHeatT_tumor_norm4$GiniIndex),c(2:12)]),
         #clustering_method = "ward.D2",
         cluster_rows = F, cluster_cols = F,
         cutree_cols = 2,
         #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 8,
         show_colnames = F)

IHC correlation plot

fig6 <- corrplot(data.matrix(ihcCorR), method = "circle",col = rev(COL2('RdBu', 200)),
                 p.mat = data.matrix(ihcCorP), insig = "label_sig", sig.level = c(.001, .01, .05), pch.cex = 0.8,
                 tl.col = "black")
fig6
ggplot(ihcCorP_melt, aes(var1, var2, fill = Corr)) +
  geom_point(shape=21, colour = "black", size = 12)+
  theme_classic()+
  scale_fill_gradientn(colours = rev(brewer.pal(5, "Spectral")))+
  labs(title = "Correlation", x = "IHC marker", y = "Position") +
  theme(axis.text = element_text(size = 10, colour = "black"), axis.title = element_text(size = 10),plot.title = element_text(size=15))+
  geom_text(aes(label = label), size = 5, colour = "black")
Warning: Removed 10 rows containing missing values (geom_text).

Figure 9 Lymphotrack cluster plot

L481

L481_L <- as.data.frame(L481_L)

L481_R <- as.data.frame(L481_R)

L481_all <- as.data.frame(L481_all)

L481_L["Class"] <- "Lymphotrack"

L481_R["Class"] <- "RNAseq"
npj.pal = pal_npg("nrc", alpha = 0.9)(10)
simp.pal = pal_simpsons("springfield")(16)
nejm.pal = pal_nejm("default", alpha = 0.9)(8)
jama.pal = pal_jama("default",alpha = 0.9)(7)
fig9_pal = c(c("#E0E3E3FF"), npj.pal, simp.pal, nejm.pal, jama.pal)

fig9_L481 <- ggplot(L481_all, aes(x= Type, y= Fraction, fill= Clone)) +
  geom_col(position = "stack", width = 0.6) +
  theme_classic() +
  scale_y_continuous(expand = c(0,0)) +
  scale_fill_manual(values = fig9_pal) +
  scale_y_break(c(40,95), scales = 0.2, ticklabels = c(95,100))


gg.gap(plot = fig9_L481,
       segments = c(40,80),
       ylim = c(0, 100),
       tick_width = c(5, 10),
       rel_heights = c(0.3, 0, 0.1)
       )

ggplot(L481_L, aes(x=Class, y= Fraction, fill= reorder(Lymphotrack, -Fraction))) +
     geom_col(position = "stack", width = 0.6) +
     theme_classic() +
     scale_fill_manual(values = fig9_pal) +
     scale_y_break(c(7,92), scales = 0.2, ticklabels = c(95,100), expand = FALSE) +
     scale_y_continuous(expand = c(0,0), breaks = c(0,2,4,6))

ggplot(L481_R, aes(x=Class, y= Fraction, fill= reorder(RNAseq, -Fraction))) +
     geom_col(position = "stack", width = 0.6) +
     theme_classic() +
     scale_fill_manual(values = fig9_pal) +
     scale_y_break(c(40,92), scales = 0.2, ticklabels = c(95,100), expand = FALSE) +
     scale_y_continuous(expand = c(0,0), breaks = c(0,10,20,30,40))

Fig10 Vocanol plot

fig10 <- ggplot(data = diffGene, aes(x = log2FoldChange, y = -log10(padj), color = sign)) + 
  geom_point(size = 1) +  #Create scatter plot
  scale_color_manual(values = c('red', 'gray', 'green'), limits = c('UP', 'none', 'DOWN')) +  #Set the color
  labs(x = 'log2 Fold Change', y = '-log10 adjust p-value', title = 'High clonality vs Low clonality', color = '') +  #Set the labels
  theme(plot.title = element_text(hjust = 0.5, size = 14), panel.grid = element_blank(), #Change the them and grid lines
  panel.background = element_rect(color = 'black', fill = 'transparent'), 
  legend.key = element_rect(fill = 'transparent')) +
  geom_vline(xintercept = c(-1, 1), lty = 3, color = 'black') +  #Add threshold line
  geom_hline(yintercept = 2, lty = 3, color = 'black') +
  xlim(-12, 12) + ylim(0, 35) + #Set the curve boundary
  geom_text_repel(
    data = diffGene,
    aes(label = label),
    size = 3, segment.color = "blue", show.legned = FALSE,
    point.padding = unit(0.8, "lines")
  )
Warning: Ignoring unknown parameters: `show.legned`

#Figure11 Phenotype

fig11
Warning: Removed 16 rows containing missing values (geom_text).

Cluster

All gini

This method start from the t cell pheno type data to check if the gini index is linked with these group. But there is also a angle that start from Gini index (25%) to analyze the t cell composition changes

phenoClin_gini3 <- phenoClin_gini2[, grep("Pan", colnames(phenoClin_gini2), invert = T)]
Error: unexpected symbol in:
"phenoClin_gini3 <- phenoClin_gini2[, grep("Pan", colnames(phenoClin_gini2), invert = T)
phenoClin_gini3"
min.max.norm <- function(x){
  (x - min(x, na.rm = TRUE))/(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}

phenoClin_gini3.norm2 <- apply(phenoClin_gini3[, c(2:40)], 2, min.max.norm)
phenoClin_gini3.norm2 <- as.data.frame(phenoClin_gini3.norm2)
phenoClin_gini3.norm2 <- cbind(phenoClin_gini3[,1], phenoClin_gini3.norm2)
colnames(phenoClin_gini3.norm2)[colnames(phenoClin_gini3.norm2) == "phenoClin_gini3[, 1]"] = "GiniIndex"
pheatmap(t(phenoClin_gini2[,grep("st_", colnames(phenoClin_gini2), invert = T)]), clustering_method = "ward.D2",
        clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
        cutree_cols = 2,
        #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
        annotation_col = ihcAnno,
        annotation_colors = ihcAnno_color,
        fontsize = 5,
        show_colnames = F)

pheatmap(t(phenoClin_gini3.norm[,c(1,grep("st_", colnames(phenoClin_gini3.norm[,c(2:16)]), invert = T)+1)]), clustering_method = "ward.D2",
         clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
         cutree_cols = 2,
         #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 5,
         show_colnames = F)
pheatmap(t(phenoClin_gini3.norm2[,grep("st_", colnames(phenoClin_gini3.norm2[,c(1:16)]), invert = T)]), clustering_method = "ward.D2",
         clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
         cutree_cols = 2,
         #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 5,
         show_colnames = F)

pheatmap(t(phenoClin_gini3.norm2[,grep("st_", colnames(phenoClin_gini3.norm2[,c(2:16)]))]), clustering_method = "ward.D2",
          cluster_rows = F, clustering_distance_cols = "euclidean",
          cutree_cols = 5,
          #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F)

pheatmap(t(phenoClin_gini3.norm2[,grep("st_", colnames(phenoClin_gini3.norm2[,c(2:13)]))+1]), clustering_method = "ward.D2",
         cluster_rows = F, clustering_distance_cols = "euclidean",
         cutree_cols = 4,
         #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 10,
         show_colnames = F)

fig11c.cut_gini <- merge(phenoClin_gini3.norm2[!(rownames(phenoClin_gini3.norm2) == "L464T"),c(1,grep("st_", colnames(phenoClin_gini3.norm2[,c(1:14)])))],
                         fig11c.cut, by = "row.names", all.y = TRUE)
fig11c.cut_gini <- as.data.frame(fig11c.cut_gini, row.names = fig11c.cut_gini$Row.names)
fig11c.cut_gini$Row.names <- NULL
colnames(fig11c.cut_gini)[colnames(fig11c.cut_gini) == "y"] <- "CutGroup"
fig11c.cut_gini$CutGroup = factor(fig11c.cut_gini$CutGroup)
ggplot(fig11c.cut_gini, aes(x = CutGroup, y = GiniIndex, fill=CutGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  #scale_color_npg() +
  labs(title = "Gini", x = "Group") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

ggplot(fig11c.cut_gini, aes(x = CutGroup, y = s_CD4_Treg, fill=CutGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  #scale_color_npg() +
  labs(title = "CD4_Treg", x = "Group") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

ggplot(fig11c.cut_gini.melt, aes(x = variable, y = value, fill=variable)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "T cell phenotype") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  facet_wrap(~CutGroup,ncol = 1)

Smooth line

fig11c.cut_gini.melt2 <- melt(cbind("ID"=rownames(fig11c.cut_gini),fig11c.cut_gini[,c(1:5)]), na.rm = F, id.vars = c("ID", "GiniIndex"), variable.factor =F)

fig11c.cut_gini.melt2$variable <- as.factor(fig11c.cut_gini.melt2$variable)

# Smooth value trend line
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value)) +
  geom_smooth(aes(color=variable,fill=variable), method = "loess") +
  theme_classic() +
  #coord_flip() +
  scale_color_npg(palette = c("nrc"), alpha=1) +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "Gini coefficient") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw stacking value
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value,fill=variable)) +
  geom_area() +
  theme_classic() +
  #coord_flip() +
  scale_color_npg(palette = c("nrc"), alpha=1) +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "Gini coefficient") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw fraction change
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "fill") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# smooth density value
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value,fill=variable)) +
  geom_area(fill=NA) +
  stat_smooth(geom = "area", method = "loess", span = 1/3, alpha=0.4)+
  theme_classic() +
  #coord_flip() +
  scale_color_npg(palette = c("nrc"), alpha=1) +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "Gini coefficient") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smooth stacking density value
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "stack",span = 0.5) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smooth fraction changes
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "fill",span = 0.3) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))+
    scale_x_continuous(expand = c(0,0), limits = c(0,0.8)) + scale_y_continuous(expand = c(0,0), limits = c(0,1))


# Raw fraction change of unnormalized data
phenoClin_gini3.melt <- melt(cbind("ID"=rownames(phenoClin_gini3),phenoClin_gini3[,c(1,grep("st_", colnames(phenoClin_gini3[,c(1:13)])))]),
                             na.rm = F, id.vars = c("ID", "GiniIndex"), variable.factor =F)

ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "fill") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw stacking of unnormalized data
ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "stack") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smooth fraction change of unnormalized data
ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "fill",span = 0.3) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))+
    scale_y_continuous(expand = c(0,0), limits = c(0,1))

# Smooth stacking unnormalized data
ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "stack",span = 0.3) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))
fig11_gini <- ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = 1)) + geom_vline(xintercept=fig11c.cut_gini.melt2[fig11c.cut_gini.melt2$variable == "st_CD4_Eff", "GiniIndex"]) + clean_theme(panel.background = element_blank())
Error in clean_theme(panel.background = element_blank()) : 
  unused argument (panel.background = element_blank())

Ordered by gini

pheatmap(t(phenoClin_gini3.norm2[order(phenoClin_gini3.norm2$GiniIndex),c(2:40)]),
         clustering_method = "ward.D2",
         cluster_rows = F, clustering_distance_cols = "euclidean",
         cutree_cols = 4,
         #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 5,
         show_colnames = F)

pheatmap(t(phenoClin_gini3.norm2[order(phenoClin_gini3.norm2$GiniIndex),c(17:31)]),
         clustering_method = "ward.D2",
         cluster_rows = F, cluster_cols = F,
         cutree_cols = 4,
         #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 5,
         show_colnames = F)

High gini group

fig11d.cut_gini <- merge(phenoClin_gini3.norm2[!(rownames(phenoClin_gini3.norm2) == "L464T"),c(1,grep("st_", colnames(phenoClin_gini3.norm2[,c(1:14)])))],
                         fig11d.cut, by = "row.names", all.y = TRUE)
fig11d.cut_gini <- as.data.frame(fig11d.cut_gini, row.names = fig11d.cut_gini$Row.names)
fig11d.cut_gini$Row.names <- NULL
colnames(fig11d.cut_gini)[colnames(fig11d.cut_gini) == "y"] <- "CutGroup"
fig11d.cut_gini$CutGroup = factor(fig11d.cut_gini$CutGroup)
fig11d.cut_gini.melt <- melt(fig11d.cut_gini, na.rm = F)
Using CutGroup as id variables
# Raw stacking change of unnormalized data (Gini high group)
ggplot(phenoClin_gini3.melt[phenoClin_gini3.melt$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "stack") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smoothed stacking change of unnormalized data (Gini high group)
ggplot(phenoClin_gini3.melt[phenoClin_gini3.melt$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "stack",span = 0.3) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw fraction change of unnormalized data (Gini high group)
ggplot(phenoClin_gini3.melt[phenoClin_gini3.melt$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "fill") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smoothed fraction change of unnormalized data (Gini high group)
ggplot(phenoClin_gini3.melt[phenoClin_gini3.melt$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "fill",span = 0.3) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw stacking value (Gini high group)
ggplot(fig11c.cut_gini.melt2[fig11c.cut_gini.melt2$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value,fill=variable)) +
  geom_area() +
  theme_classic() +
  #coord_flip() +
  scale_color_npg(palette = c("nrc"), alpha=1) +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "Gini coefficient") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw fraction value (Gini high group)
ggplot(fig11c.cut_gini.melt2[fig11c.cut_gini.melt2$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "fill") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smoothed stacking change (Gini high group)
ggplot(fig11c.cut_gini.melt2[fig11c.cut_gini.melt2$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "stack",span = 0.5) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smoothed fraction change (Gini high group)
ggplot(fig11c.cut_gini.melt2[fig11c.cut_gini.melt2$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "fill",span = 0.5) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

Fig12

PhenoAll <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities_sep.xlsx", sheet = "All")
PhenoAllNoCK <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities_sep.xlsx", sheet = "noCK")
PhenoTIL <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities_sep.xlsx", sheet = "TIL")
PhenoNK <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities_sep.xlsx", sheet = "NK")
PhenoAPC <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities_sep.xlsx", sheet = "APC")

PhenoAll <- as.data.frame(PhenoAll)
PhenoAllNoCK <- as.data.frame(PhenoAllNoCK)
PhenoTIL <- as.data.frame(PhenoTIL)
PhenoNK <- as.data.frame(PhenoNK)
PhenoAPC <- as.data.frame(PhenoAPC)

PhenoAll$Location <- factor(PhenoAll$Location, levels = c("Tumor", "Stroma", "SAndT"))
PhenoAllNoCK$Location <- factor(PhenoAllNoCK$Location, levels = c("Tumor", "Stroma", "SAndT"))
PhenoTIL$Location <- factor(PhenoTIL$Location, levels = c("Tumor", "Stroma", "SAndT"))
PhenoNK$Location <- factor(PhenoNK$Location, levels = c("Tumor", "Stroma", "SAndT"))
PhenoAPC$Location <- factor(PhenoAPC$Location, levels = c("Tumor", "Stroma", "SAndT"))

PhenoAll$PhenoType <- factor(PhenoAll$PhenoType, levels = c("CD4_Eff","CD4_Treg","CD8_Eff","CD8_Treg","B_cells","NK_cells","NKT_cells","M1","M2","CD163","iDC","mDC","pDC","PanCKsingle_TIL", "PanCKsingle_NK","PanCKsingle_APC"))
PhenoAllNoCK$PhenoType <- factor(PhenoAllNoCK$PhenoType, levels = c("CD4_Eff","CD4_Treg","CD8_Eff","CD8_Treg","B_cells","NK_cells","NKT_cells","M1","M2","CD163","iDC","mDC","pDC"))
PhenoTIL$PhenoType <- factor(PhenoTIL$PhenoType, levels = c("CD4_Eff","CD4_Treg","CD8_Eff","CD8_Treg","B_cells","PanCKsingle_TIL"))
PhenoNK$PhenoType <- factor(PhenoNK$PhenoType, levels = c("NK_cells","NKT_cells","M1","M2","CD163","PanCKsingle_NK"))
PhenoAPC$PhenoType <- factor(PhenoAPC$PhenoType, levels = c("iDC","mDC","pDC","PanCKsingle_APC"))
fig12a <- ggplot(PhenoAllNoCK, aes(x = PhenoType, y = value, fill=giniGroup)) +
    geom_boxplot(outlier.shape = 16, outlier.size = 2, ) +
    theme_classic() +
    # coord_flip() +
    # scale_fill_npg() +
    theme(legend.position = "none") +
    stat_compare_means(paired = NULL, size=2) +
    facet_wrap(~Location, nrow = 3, strip.position = "left", scale="free") +
    scale_y_continuous(expand = c(0,0))


fig12b <- ggplot(PhenoAllNoCK, aes(x = Location, y = value, fill=giniGroup)) +
    geom_boxplot(outlier.shape = 16, outlier.size = 2, ) +
    theme_classic() +
    # coord_flip() +
    # scale_fill_npg() +
    theme(legend.position = "none") +
    stat_compare_means(paired = NULL, size=1.5) +
    facet_wrap(~PhenoType, nrow = 3, strip.position = "left", scale="free")

Fig13 Distance data

DistTIL <- read_excel("~/Project/TCR/Distance/BOMI2_dist.xlsx", sheet = "TIL_Gini")
DistTIL <- as.data.frame(DistTIL)
TIL_corrT <- rcorr(as.matrix(DistTIL[,c(5,27:56)]), type = "spearman")
write.csv(TIL_corrT$r, file = "~/Project/TCR/Distance/TIL_corr.csv")
write.csv(TIL_corrT$P, file = "~/Project/TCR/Distance/TIL_corr_Pvalue.csv")

# melt
TIL_corr_melt <- read_excel("~/Project/TCR/Distance/BOMI2_dist.xlsx", sheet = "TIL_Corr")
TIL_corr_melt[,"P_adjust"] <- p.adjust(TIL_corr_melt$`P-Value`,method = "BH")
TIL_corr_melt$label <- NULL
TIL_corr_melt.1d[TIL_corr_melt.1d$var1]
TIL_corr_melt[TIL_corr_melt$P_adjust <= 0.001,"label"] <- "***"
TIL_corr_melt[TIL_corr_melt$P_adjust > 0.001 & TIL_corr_melt$P_adjust <= 0.01,"label"] <- "**"
TIL_corr_melt[TIL_corr_melt$P_adjust > 0.01 & TIL_corr_melt$P_adjust <= 0.05,"label"] <- "*"
TIL_corr_melt$Correlation
ggplot(TIL_corr_melt, aes(var1, var2, fill = Correlation)) +
    geom_point(shape=21, colour = "black", size = 12)+
    theme_classic()+
    scale_fill_gradientn(colours = rev(brewer.pal(5, "Spectral")))+
    labs(title = "Correlation", x = "Outset", y = "End") +
    theme(axis.text = element_text(size = 10, colour = "black"), axis.title = element_text(size = 10),plot.title = element_text(size=15))+
    geom_text(aes(label = label), size = 5, colour = "black")

1D plot

TIL_corr_melt.1d <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "TIL_Corr_1D")
TIL_corr_melt.1d <- as.data.frame(TIL_corr_melt.1d)
# -0.122419365  0.138273993
#TIL_corr_melt.1d[TIL_corr_melt.1d$var1 == "CD4_Treg" & TIL_corr_melt.1d$var2 == "PanCKsingle", c("Correlation","P-Value")] <- c(-0.122419365, 0.138273993)
TIL_corr_melt.1d[,"P_adjust"] <- p.adjust(TIL_corr_melt.1d$`P-Value`,method = "BH")
TIL_corr_melt.1d[TIL_corr_melt.1d$P_adjust <= 0.001,"label"] <- "***"
TIL_corr_melt.1d[TIL_corr_melt.1d$P_adjust > 0.001 & TIL_corr_melt.1d$P_adjust <= 0.01,"label"] <- "**"
TIL_corr_melt.1d[TIL_corr_melt.1d$P_adjust > 0.01 & TIL_corr_melt.1d$P_adjust <= 0.05,"label"] <- "*"
ggplot(TIL_corr_melt.1d, aes(var2, var1, fill = Correlation)) +
    geom_point(shape=21, colour = "black", size = 12)+
    theme_classic()+
    scale_fill_gradientn(colours = rev(brewer.pal(5, "Spectral")))+
    labs(title = "Correlation", x = "Outset", y = "End") +
    theme(axis.text = element_text(size = 10, colour = "black"), axis.title = element_text(size = 10),plot.title = element_text(size=15))+
    geom_text(aes(label = label), size = 5, colour = "black")

Fig14 immunetherapy treated cohort

library(readxl)
itCohort <- read_excel("~/Project/TCR/ImmuneCohort/GSE126044_gini_ab.xlsx", 
    sheet = "Sheet3")
View(itCohort)

itCohort <- as.data.frame(itCohort)

print(fig14b_test)
Call: survfit(formula = Surv(`Progression free survival (month)`, Status) ~ 
    giniGroup, data = itCohort)

               n events median 0.95LCL 0.95UCL
giniGroup=High 5      3  13.83    13.5      NA
giniGroup=Low  4      4   2.87     1.1      NA
summary(fig14b_test)$table
               records n.max n.start events    rmean se(rmean)    median 0.95LCL 0.95UCL
giniGroup=High       5     5       5      3 14.02667 1.7040449 13.833333    13.5      NA
giniGroup=Low        4     4       4      4  2.60000 0.4980518  2.866667     1.1      NA

surv_median(fig14b_test)
Warning: `select_()` was deprecated in dplyr 0.7.0.
Please use `select()` instead.

Figure 15 Lymph nodes

Beta clone distance

GiniDist <- read.table("/Users/huiyu818/Project/TCR/TCRdist//cloneSummary/beta_tumor_dist_rank.txt")
GiniDist <- as.data.frame(GiniDist)
pheatmap(GiniDist,
         cluster_rows = F, cluster_cols = F,
         #cutree_cols = 3,
         color = colorRampPalette(brewer.pal(n = 7, name = "RdYlBu"))(100),
         fontsize = 5,
         show_colnames = T
)
# Load the annotation
GiniDist_anno <-  read_excel("~/Project/TCR/TCRdist/cloneSummary/beta_tumor.xlsx", sheet = "Distance_DF_2")
GiniDist_anno <- as.data.frame(GiniDist_anno)
rownames(GiniDist_anno) <- GiniDist_anno$clone_id_new
GiniDist_anno_color = list(
#  "GiniIndex" = colorRampPalette(brewer.pal(n = 9, name = "Reds"))(100),
  "Anti_virus" = c(Y = "#D45F51", N = "#B7D2E8"))

pheatmap(GiniDist,
         clustering_method = "ward.D2",
         clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
         color = colorRampPalette(brewer.pal(n = 7, name = "RdYlBu"))(100),
         annotation_col = GiniDist_anno["Anti_virus"],
         annotation_colors = GiniDist_anno_color,
         fontsize = 5,
         show_colnames = T
         )
pheatmap(t(ihcHeatT_tumor_norm2), clustering_method = "ward.D2",
                  clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
                  cutree_cols = 3,
                  #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
                  annotation_col = ihcAnno,
                  annotation_colors = ihcAnno_color,
                  fontsize = 5,
                  show_colnames = F
                  )

Cancer testis antigens

CTA_list <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "CTA")

CTA_list <- as.data.frame(CTA_list)
Bomi2RC_tumor_norm <- read.csv("~/Project/Clinical/BOMI2/BOMI2_RNAseq_FPKM/RNASeq_counts_tumor_norm.txt", sep="\t")

CTA list 1 (all from database)

setnames(GiniClin_CTA_ori, old =  CTA_list[["Ensgid_1"]], new = CTA_list[["Gene_1"]])
Error in setnames(GiniClin_CTA_ori, old = CTA_list[["Ensgid_1"]], new = CTA_list[["Gene_1"]]) : 
  Items of 'old' not found in column names: [ENSG00000183461, ENSG00000204379, ENSG00000204382, ENSG00000204376, ENSG00000204375, ENSG00000221867, ENSG00000197172, ENSG00000137948, ENSG00000123584, ENSG00000183305, ...]. Consider skip_absent=TRUE.
GiniClin_CTA_ori[1,37]
[1] 2.098581
CTA_corr <- rcorr(as.matrix(GiniClin_CTA_ori[,c(5,37:132)]), type = "spearman")

write.csv(CTA_corr$r, file = "~/Project/TCR/CTA/ctaCor.csv")
write.csv(CTA_corr$P, file = "~/Project/TCR/CTA/ctaPvalue.csv")
CTA_corrT <- rename(CTA_corrT, c("...1"="GeneSymbol"))
Error in `rename()`:
! Can't rename columns that don't exist.
✖ Column `GeneSymbol` doesn't exist.
Backtrace:
 1. dplyr::rename(CTA_corrT, c(...1 = "GeneSymbol"))
 2. dplyr:::rename.data.frame(CTA_corrT, c(...1 = "GeneSymbol"))

CTA list 2 (noval list)

GiniClin_CTA_ori_2 <- merge(GiniClin_tumor, GiniClin_CTA_ori_2, by.x = "ID" ,by.y = "row.names")
setnames(GiniClin_CTA_ori_2, old =  CTA_list[["Ensgid_2"]], new = CTA_list[["Gene_2"]])
Error in setnames(GiniClin_CTA_ori_2, old = CTA_list[["Ensgid_2"]], new = CTA_list[["Gene_2"]]) : 
  NA in 'new' at positions [91, 92, 93, 94, 95, 96]
GiniClin_CTA_ori_2[3,37]
[1] 0.9198992
CTA_corr_2 <- rcorr(as.matrix(GiniClin_CTA_ori_2[,c(5,37:126)]), type = "spearman")

write.csv(CTA_corr_2$r, file = "~/Project/TCR/CTA/ctaCor_list2.csv")
write.csv(CTA_corr_2$P, file = "~/Project/TCR/CTA/ctaPvalue_list2.csv")

<!-- rnb-source-end -->

<!-- rnb-output-begin eyJkYXRhIjoiRXJyb3I6IGF0dGVtcHQgdG8gdXNlIHplcm8tbGVuZ3RoIHZhcmlhYmxlIG5hbWVcbiJ9 -->

Error: attempt to use zero-length variable name




<!-- rnb-output-end -->

<!-- rnb-chunk-end -->


<!-- rnb-text-begin -->



<!-- rnb-text-end -->


<!-- rnb-chunk-begin -->


<!-- rnb-source-begin eyJkYXRhIjoiYGBgclxuQ1RBX2NvcnJUXzIgPC0gcmVhZF9leGNlbChcIn4vUHJvamVjdC9UQ1IvQ1RBL0NUQS54bHN4XCIsIHNoZWV0ID0gXCJHZW5lTGlzdDJcIilcbkNUQV9jb3JyVF8yIDwtIGFzLmRhdGEuZnJhbWUoQ1RBX2NvcnJUXzIpXG5cbkNUQV9jb3JyVF8yWyxcInBhZGpcIl0gPC0gcC5hZGp1c3QoQ1RBX2NvcnJUXzIkUHZhbHVlLG1ldGhvZCA9IFwiQkhcIilcbmBgYCJ9 -->

```r
CTA_corrT_2 <- read_excel("~/Project/TCR/CTA/CTA.xlsx", sheet = "GeneList2")
CTA_corrT_2 <- as.data.frame(CTA_corrT_2)

CTA_corrT_2[,"padj"] <- p.adjust(CTA_corrT_2$Pvalue,method = "BH")

Vocanol plot

ggplot(data = diffGene, aes(x = log2FoldChange, y = -log10(padj), color = sign)) + 
  geom_point(size = 1) +  #Create scatter plot
  scale_color_manual(values = c('red', 'gray', 'green'), limits = c('UP', 'none', 'DOWN')) +  #Set the color
  labs(x = 'log2 Fold Change', y = '-log10 adjust p-value', title = 'High clonality vs Low clonality', color = '') +  #Set the labels
  theme(plot.title = element_text(hjust = 0.5, size = 14), panel.grid = element_blank(), #Change the them and grid lines
  panel.background = element_rect(color = 'black', fill = 'transparent'), 
  legend.key = element_rect(fill = 'transparent')) +
  geom_vline(xintercept = c(-1, 1), lty = 3, color = 'black') +  #Add threshold line
  geom_hline(yintercept = 2, lty = 3, color = 'black') +
  xlim(-12, 12) + ylim(0, 35) + #Set the curve boundary
  geom_text_repel(
    data = diffGene,
    aes(label = label),
    size = 3, segment.color = "blue", show.legned = FALSE,
    point.padding = unit(0.8, "lines")
  )

ISS plot

library(dplyr)

ISS_dot_data <- ISS_dot_data %>%
  group_by(Sample) %>%
  mutate(Normalized_Clone_fraction = scale(Clone_fraction),  # Normalizing clone fraction
         Normalized_Clone_mean = scale(Clone_mean)) %>%  # Normalizing clone mean
  ungroup()
ggplot(ISS_dot_data, aes(Clone, EPCAM_CDH1_anno)) +
  geom_point(aes(size=Normalized_Clone_fraction, color = Normalized_Clone_mean))+
  facet_wrap(~ Sample, scales = "free_x", drop = TRUE) +
  theme_bw()+
  theme(panel.grid.major = element_blank(),  # Remove major grid lines
        panel.grid.minor = element_blank()) + # Remove minor grid lines
  scale_size(range = c(1, 10)) +
  scale_color_gradientn(colors = rev(brewer.pal(5, "Spectral")))+
  labs(title = "Bubble plot grouped by sample", x = "Clone", y = "EPCAM_CDH1 annotation") +
  theme(axis.text.x = element_text(size = 10, colour = "black", angle = 45), axis.title = element_text(size = 10),plot.title = element_text(size=15))

ggplot(ISS_dot_data_L766, aes(Clone, EPCAM_CDH1_anno)) +
  geom_point(aes(size=Clone_fraction, color = Clone_mean))+
  theme_bw()+
  theme(panel.grid.major = element_blank(),  # Remove major grid lines
        panel.grid.minor = element_blank()) + # Remove minor grid lines
  scale_color_gradientn(colors = rev(brewer.pal(5, "Spectral")))+
  labs(title = "Bubble plot grouped by sample", x = "Clone", y = "EPCAM_CDH1 annotation") +
  theme(axis.text.x = element_text(size = 10, colour = "black", angle = 45), axis.title = element_text(size = 10),plot.title = element_text(size=15))

---
title: "R Notebook"
output: html_notebook
---

```{r}
library(readxl)
library(clusterProfiler)

library(GOplot)
library(DOSE)

library(topGO)
library(circlize)
library("pheatmap")

library("ggsci")
library("ggplot2")
library("gridExtra")

library("survival")
library("survminer")

library(ggpubr)
library(rstpm2)
library(corrplot)
library(Hmisc)

library(cowplot)
library(reshape)

library(ggbreak)
library(ggrepel)
```


```{r}
GiniClin <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "Gini_Clinic")

GiniClin <- as.data.frame(GiniClin, row.names = GiniClin$ID)

```

# Figure 1

```{r}
f1a <- ggplot(GiniClin, aes(x=GiniIndex)) +
  geom_histogram(colour = "black", fill="#4dbbd5ff", bins = 50) +
  theme_classic() +
  theme(axis.text = element_text(size = 20)) +
  labs(title = "Gini_distribution", y = "Patient count") +
  scale_x_continuous(expand = c(0,0), limits = c(0, 0.8)) + scale_y_continuous(expand = c(0,0), limits = c(0,25))

f1a
```

```{r}
qqnorm(GiniClin_tumor$GiniIndex, ylab = 'GiniIndex')
qqline(GiniClin_tumor$GiniIndex) # 增加趋势直线，利于比较
```

```{r}
# H0 is that the data fit normal distribution
shapiro.test(GiniClin_tumor$GiniIndex)
```



```{r}
GiniClin_AcSq <- GiniClin[GiniClin$histology_Hans_B_WHO_2015 %in% c("AC","SqCC"), ]

f1a_2 <- ggplot(GiniClin_AcSq, aes(x=GiniIndex, fill = histology_Hans_B_WHO_2015)) +
  geom_histogram(color = "black", position = "identity", bins = 50, size = 0.3) +
  theme_classic() +
  theme(axis.text = element_text(size = 20, colour = "black"), legend.position = c(0.8,0.8)) +
  labs(title = "Gini_distribution", fill = "Histology") +
  scale_fill_manual(values = alpha(c("#F8766D", "#00BFC4"), 0.75)) +
  scale_x_continuous(expand = c(0,0), limits = c(0,0.85)) + scale_y_continuous(expand = c(0,0), limits = c(0,17), breaks = c(seq(0,15,5)))

f1a_2
```


```{r}
GiniClin_tumor <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "Gini_Clinic_tumor")

GiniClin_tumor <- as.data.frame(GiniClin_tumor, row.names = GiniClin_tumor$ID)
```




## Box plot
```{r}
# Age group
GiniClin_tumor[GiniClin_tumor$age >= 70 ,"ageGroup"] <- "Above 70"
GiniClin_tumor[GiniClin_tumor$age < 70 ,"ageGroup"] <- "Under 70"

# f1b_1 <- ggplot(GiniClin_tumor, aes(x = ageGroup, y = GiniIndex, fill=ageGroup)) +

f1b_1 <- ggplot(GiniClin_tumor, aes(x = ageGroup, y = GiniIndex, fill=ageGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  #scale_color_npg() +
  labs(title = "Age", x = "Age") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

f1b_1
```

```{r}
GiniClin_tumor[GiniClin_tumor$`stadium numbers edt 8 (PMI)` >= 5 ,"stageGroup"] <- "High stage"
GiniClin_tumor[GiniClin_tumor$`stadium numbers edt 8 (PMI)` < 5 ,"stageGroup"] <- "Low stage"

f1b_2 <- ggplot(GiniClin_tumor, aes(x = stageGroup, y = GiniIndex, fill=stageGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  # scale_color_npg() +
  labs(title = "Stage", x = "Stage") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

f1b_2
```


```{r}
# histology
f1b_3 <- ggplot(GiniClin_tumor, aes(x = histology_Hans_B_WHO_2015, y = GiniIndex, fill=histology_Hans_B_WHO_2015)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  #coord_flip() +
  scale_fill_manual(values = c("#F8766D", "#999999", "#00BF7D","#00B0F6","#A3A500", "#00BFC4")) +
  labs(title = "Histology", x = "Histology") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

f1b_3
```

```{r}
GiniClin_tumor[GiniClin_tumor$gender == "M" ,"gender"] <- "Male"
GiniClin_tumor[GiniClin_tumor$gender == "F" ,"gender"] <- "Female"

f1b_4 <- ggplot(GiniClin_tumor, aes(x = gender, y = GiniIndex, fill=gender)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  # scale_fill_npg() +
  labs(title = "Gender", x = "Sex") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

f1b_4

```

```{r}
# Smoke
GiniClin_tumor[GiniClin_tumor$smoke == "N" ,"smoke"] <- "Never smoke"
GiniClin_tumor[GiniClin_tumor$smoke == "C" ,"smoke"] <- "Current smoke"
GiniClin_tumor[GiniClin_tumor$smoke == "F" ,"smoke"] <- "Former smoke"

f1b_5 <- ggplot(GiniClin_tumor, aes(x = smoke, y = GiniIndex, fill=smoke)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#F8766D", "#00BFC4", "#999999")) +
  labs(title = "Smoke", x = "Smoke") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

f1b_5
```

```{r}
# Smoke
GiniClin_tumor[GiniClin_tumor$smoke == "Never smoke" ,"smoke_2"] <- "Non smoker"
GiniClin_tumor[GiniClin_tumor$smoke == "Current smoke" | GiniClin_tumor$smoke == "Former smoke","smoke_2"] <- " Smoker"

f1b_5_2 <- ggplot(GiniClin_tumor, aes(x = smoke_2, y = GiniIndex, fill=smoke_2)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
  labs(title = "Smoke", x = "Smoke") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)

f1b_5_2
```

```{r}
# WHO PS
GiniClin_tumor[GiniClin_tumor$WHO_PS == 0 ,"WHO_PS_group"] <- 0
GiniClin_tumor[GiniClin_tumor$WHO_PS > 0 ,"WHO_PS_group"] <- "> 0"

f1b_6 <- ggplot(GiniClin_tumor, aes(x = WHO_PS_group, y = GiniIndex, fill=WHO_PS_group)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
  labs(title = "WHO performance status", x = "WHO performance rank") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)

f1b_6
```

### AC/SqCC
```{r}
ggplot(GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "AC", ], aes(x = stageGroup, y = GiniIndex, fill=stageGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  # scale_color_npg() +
  labs(title = "Stage", x = "Stage") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)
```

```{r}
ggplot(GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "SqCC", ], aes(x = stageGroup, y = GiniIndex, fill=stageGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  # scale_color_npg() +
  labs(title = "Stage", x = "Stage") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)
```

```{r}
ggplot(GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "AC", ], aes(x = WHO_PS_group, y = GiniIndex, fill=WHO_PS_group)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
  labs(title = "WHO performance status", x = "WHO performance rank") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
```

```{r}
ggplot(GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "SqCC", ], aes(x = WHO_PS_group, y = GiniIndex, fill=WHO_PS_group)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#F8766D", "#00BFC4")) +
  labs(title = "WHO performance status", x = "WHO performance rank") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
```




# Figure 2
```{r}
library("survival")
library("survminer")
```

## Figure2A: KM plot

### All patient


```{r}
GiniClin_tumor[GiniClin_tumor$GiniIndex >= 0.25431, "giniGroup"] <- "Above median"
GiniClin_tumor[GiniClin_tumor$GiniIndex < 0.25431, "giniGroup"] <- "Under median"
GiniClin_tumor$giniGroup = factor(GiniClin_tumor$giniGroup, levels = c("Above median", "Under median"))
GiniClin_tumor['L481T', 'status_5_years_trunk_20190329'] = 1
GiniClin_tumor2 <- GiniClin_tumor[!GiniClin_tumor$ID == 'L584T',]
```

```{r}
# construct the object
fig2test <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_tumor)

print(fig2test)

summary(fig2test)$table

```

```{r}
fig2a_all <- ggsurvplot(fig2test,
                        pval = TRUE,
                        palette = c("#e64b35", "#4dbbd5"),
                        break.time.by = 1,
                        ggtheme = theme_classic(),
                        legend.labs = c("Above median", "Under median"),
                        risk.table = "abs_pct",
                        risk.table.y.text = FALSE,
                        xlab = "Time (years)",
                        title = "giniSurv",
                        xlim = c(0,5.2),
                        axes.offset = FALSE,
                        risk.table.fontsize = 2.5,
                        legend = c(0.9,0.9),
                        risk.table.title = "Number at risk by time: n (%)",
                        font.main = c())


fig2a_all
```

```{r}
fig2a_continue.cox <- coxph(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ GiniIndex, data = GiniClin_tumor)
fig2a_continue.cox
```

```{r}
ggsurvplot(survfit(fig2a_continue.cox, data=GiniClin_tumor), color = "#2E9FDF",ggtheme = theme_minimal())
```


#### Time split surv
```{r}
GiniClin_tumor[GiniClin_tumor$giniGroup == "Above median", "giniGroup_nr"] <- 1
GiniClin_tumor[GiniClin_tumor$giniGroup == "Under median", "giniGroup_nr"] <- 0

fig2_timespl <- survSplit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329 == 1) ~ ., data=GiniClin_tumor2, cut=c(2), episode = "tgroup")

fig2_timespl.cox <- coxph(Surv(tstart, OS_years_5_years_trunk_20190329, event==1) ~ giniGroup_nr:strata(tgroup), data=fig2_timespl, ties="efron")
summary(fig2_timespl.cox)

```

```{r}
cox.zph(fig2_timespl.cox)
```

```{r}
fig2_mod_tvc <- stpm2(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329 == 1) ~ giniGroup_nr, data = GiniClin_tumor, df=3, tvc=list(giniGroup_nr=1))

plot(fig2_mod_tvc, newdata = data.frame(giniGroup_nr = 0), type = "hazard",  xlim=c(0.5,5), ylim=c(0,0.2), ylab = "Hazard", xlab = "Time", lwd = 2, ci = FALSE)
plot(fig2_mod_tvc, newdata = data.frame(giniGroup_nr = 1), type = "hazard", line.col=2, ci=FALSE, lwd = 2, add=TRUE)
```

#### Calculate best cutoff
```{r}
fig2.cut <- surv_cutpoint(GiniClin_tumor,
                          time = "OS_years_5_years_trunk_20190329",
                          event = "status_5_years_trunk_20190329",
                          variables = "GiniIndex")
summary(fig2.cut)
```
```{r}
plot(fig2.cut, "GiniIndex", palette = "npg")
```
```{r}
fig2.cut.cat <- surv_categorize(fig2.cut)
fig2.cut.fit <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ GiniIndex, data = fig2.cut.cat)

ggsurvplot(fig2.cut.fit,
           data = fig2.cut.cat,
           risk.table = TRUE,
           pval = T)
```


#### Calculate AC and SqCC

```{r}
# AC
fig2.cut_AC <- surv_cutpoint(GiniClin_tumor_Ac,
                             time = "OS_years_5_years_trunk_20190329",
                             event = "status_5_years_trunk_20190329",
                             variables = "GiniIndex")
summary(fig2.cut_AC)
```
```{r}
plot(fig2.cut_AC, "GiniIndex", palette = "npg")
```
```{r}
fig2.cut_AC.cat <- surv_categorize(fig2.cut_AC)
fig2.cut_AC.fit <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ GiniIndex, data = fig2.cut_AC.cat)

ggsurvplot(fig2.cut_AC.fit,
           data = fig2.cut.cat,
           risk.table = TRUE,
           pval = T)
```


#### KM without censoring
```{r}
fig2_NoCensor_test <- survfit(Surv(OS_years_20190329, status_out_20190329) ~ giniGroup, data = GiniClin_tumor)

print(fig2_NoCensor_test)

summary(fig2_NoCensor_test)$table
```



```{r}
fig2_NoCensor <- ggsurvplot(fig2_NoCensor_test,
                        pval = TRUE,
                        palette = c("#e64b35", "#4dbbd5"),
                        break.time.by = 2,
                        ggtheme = theme_classic(),
                        legend.labs = c("Above median", "Under median"),
                        risk.table = "abs_pct",
                        risk.table.y.text = FALSE,
                        xlab = "Time (years)",
                        title = "giniSurv_without_censoring",
                        axes.offset = FALSE,
                        risk.table.fontsize = 2.5,
                        legend = c(0.9,0.9),
                        risk.table.title = "Number at risk by time: n (%)",
                        font.main = c())

fig2_NoCensor
```


```{r}
fig2_NoCensor.cut <- surv_cutpoint(GiniClin_tumor,
                          time = "OS_years_20190329",
                          event = "status_out_20190329",
                          variables = "GiniIndex")
summary(fig2_NoCensor.cut)
```

```{r}
plot(fig2_NoCensor.cut, "GiniIndex", palette = "npg")
```
```{r}
fig2_NoCensor.cut.cat <- surv_categorize(fig2_NoCensor.cut)
fig2_NoCensor.cut.fit <- survfit(Surv(OS_years_20190329, status_out_20190329) ~ GiniIndex, data = fig2_NoCensor.cut.cat)

ggsurvplot(fig2_NoCensor.cut.fit,
           data = fig2_NoCensor.cut.cat,
           risk.table = TRUE,
           pval = T)
```

```{r}
ggsurvplot(fig2_NoCensor.cut.fit,
            pval = TRUE,
            palette = c("#e64b35", "#4dbbd5"),
            break.time.by = 2,
            ggtheme = theme_classic(),
            legend.labs = c("Above median", "Under median"),
            risk.table = "abs_pct",
            risk.table.y.text = FALSE,
            xlab = "Time (years)",
            title = "giniSurv_without_censoring",
            axes.offset = FALSE,
            risk.table.fontsize = 2.5,
            legend = c(0.9,0.9),
            risk.table.title = "Number at risk by time: n (%)",
            font.main = c())
```




### AC patient

```{r}
GiniClin_tumor_Ac = GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "AC", ]

fig2ACtest <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_tumor_Ac)

print(fig2ACtest)
print("====")
summary(fig2ACtest)$table
```

```{r}
fig2a_AC <- ggsurvplot(fig2ACtest,
                        pval = TRUE,
                        palette = c("#e64b35", "#4dbbd5"),
                        break.time.by = 1,
                        ggtheme = theme_classic(),
                        #legend.labs = c("Above median", "Under median"),
                        risk.table = "abs_pct",
                        risk.table.y.text = FALSE,
                        xlab = "Time (years)",
                        xlim = c(0,5.2),
                        axes.offset = FALSE,
                        risk.table.fontsize = 2.5,
                        legend = c(0.9,0.9),
                        risk.table.title = "Number at risk by time: n (%)",
                        font.main = c())


fig2a_AC
```

#### without sensoring
```{r}
fig2AC_NoCensor_test <- survfit(Surv(OS_years_20190329, status_out_20190329) ~ giniGroup, data = GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "AC", ])

print(fig2AC_NoCensor_test)

summary(fig2AC_NoCensor_test)$table
```

```{r}
ggsurvplot(fig2AC_NoCensor_test,
          pval = TRUE,
          palette = c("#e64b35", "#4dbbd5"),
          break.time.by = 1,
          ggtheme = theme_classic(),
          legend.labs = c("Above median", "Under median"),
          risk.table = "abs_pct",
          risk.table.y.text = FALSE,
          xlab = "Time (years)",
          title = "giniSurv_without_censoring",
          axes.offset = FALSE,
          risk.table.fontsize = 2.5,
          legend = c(0.9,0.9),
          risk.table.title = "Number at risk by time: n (%)",
          font.main = c())
```


### SqCC patient

```{r}
GiniClin_tumor_SqCC = GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "SqCC", ]

fig2SqCCtest_2 <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_tumor_SqCC)

print(fig2SqCCtest)
print("====")
summary(fig2SqCCtest)$table
```

```{r}
fig2a_SqCC_2 <- ggsurvplot(fig2SqCCtest_2,
                        pval = TRUE,
                        palette = c("#e64b35", "#4dbbd5"),
                        break.time.by = 1,
                        ggtheme = theme_classic(),
                        legend.labs = c("Above median", "Under median"),
                        risk.table = "abs_pct",
                        risk.table.y.text = FALSE,
                        xlab = "Time (years)",
                        xlim = c(0,5.2),
                        axes.offset = FALSE,
                        risk.table.fontsize = 2.5,
                        legend = c(0.9,0.9),
                        risk.table.title = "Number at risk by time: n (%)",
                        font.main = c())


fig2a_SqCC_2
```





```{r}
fig2_SqCC_timespl <- survSplit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329 == 1) ~ ., data=GiniClin_tumor_SqCC, cut=2, episode = "tgroup")

fig2_SqCC_timespl.cox <- coxph(Surv(tstart, OS_years_5_years_trunk_20190329, event==1) ~ giniGroup:strata(tgroup), data=fig2_SqCC_timespl, ties="efron")
summary(fig2_SqCC_timespl.cox)
```

#### without sensoring
```{r}
fig2SqCC_NoCensor_test <- survfit(Surv(OS_years_20190329, status_out_20190329) ~ giniGroup, data = GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "SqCC", ])

print(fig2SqCC_NoCensor_test)

summary(fig2SqCC_NoCensor_test)$table
```

```{r}
ggsurvplot(fig2SqCC_NoCensor_test,
          pval = TRUE,
          palette = c("#e64b35", "#4dbbd5"),
          break.time.by = 1,
          ggtheme = theme_classic(),
          legend.labs = c("Above median", "Under median"),
          risk.table = "abs_pct",
          risk.table.y.text = FALSE,
          xlab = "Time (years)",
          title = "giniSurv_without_censoring",
          axes.offset = FALSE,
          risk.table.fontsize = 2.5,
          legend = c(0.9,0.9),
          risk.table.title = "Number at risk by time: n (%)",
          font.main = c())
```


### Table: Cox regression

Single variate
```{r}
GiniClin_tumor$giniGroup = factor(GiniClin_tumor$giniGroup, levels = c("Under median", "Above median"))

fig2.cox <- coxph(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_tumor)

print("===")
fig2.cox
print("===")
print(summary(fig2.cox))
print("===")
```


```{r}
cox.zph(fig2.cox)
```


```{r}
# Merge the histology group except AC and SqCC to others
GiniClin_tumor[!(GiniClin_tumor$histology_Hans_B_WHO_2015 %in% c("AC","SqCC")), "Histology"] <- "Other"
GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "AC", "Histology"] <- "AC"
GiniClin_tumor[GiniClin_tumor$histology_Hans_B_WHO_2015 == "SqCC", "Histology"] <- "SqCC"
```

```{r}
# Merge the current smoke and former smoke in to one group
GiniClin_tumor[GiniClin_tumor$smoke %in% c("Current smoke","Former smoke"), "smokeGroup"] <- "Current/Former"
GiniClin_tumor[GiniClin_tumor$smoke == "Never smoke", "smokeGroup"] <- "Never"
```


```{r}
# Construct levels of each parameter
GiniClin_tumor$stageGroup = factor(GiniClin_tumor$stageGroup, levels = c("Low stage", "High stage"))
GiniClin_tumor$gender = factor(GiniClin_tumor$gender, levels = c("Male", "Female"))
GiniClin_tumor$Histology = factor(GiniClin_tumor$Histology, levels = c("AC", "SqCC", "Other"))
GiniClin_tumor$ageGroup = factor(GiniClin_tumor$ageGroup, levels = c("Under 70", "Above 70"))
GiniClin_tumor$smokeGroup = factor(GiniClin_tumor$smokeGroup, levels = c("Never","Current/Former"))
```

#### Multi-variate cox regression

```{r}
fig2.multicox <- coxph(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup + ageGroup + gender + smokeGroup + Histology + stageGroup, data = GiniClin_tumor)

summary(fig2.multicox)
```


### Without adjuvent treatment
```{r}
# Subset the group without adjuvent treatment
GiniClin_noTreat = subset(GiniClin_tumor, Annette_adjuvant == 0)

GiniClin_noTreat = as.data.frame(GiniClin_noTreat)

GiniClin_noTreat$giniGroup = factor(GiniClin_noTreat$giniGroup, levels = c("Above median", "Under median"))

# construct the object
fig8test <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_noTreat)

print(fig8test)

summary(fig8test)$table
```


```{r}
fig8a_all <- ggsurvplot(fig8test,
                        pval = TRUE,
                        palette = c("#e64b35", "#4dbbd5"),
                        break.time.by = 1,
                        ggtheme = theme_classic(),
                        #legend.labs = c("Above median", "Under median"),
                        risk.table = "abs_pct",
                        risk.table.y.text = FALSE,
                        xlab = "Time (years)",
                        title = "giniSurv_no_treatment",
                        xlim = c(0,5.2),
                        axes.offset = FALSE,
                        risk.table.fontsize = 2.5,
                        legend = c(0.9,0.9),
                        risk.table.title = "Number at risk by time: n (%)",
                        font.main = c())


fig8a_all
```

```{r}
# Subset the group with adjuvent treatment
GiniClin_Treat = subset(GiniClin_tumor, Annette_adjuvant == 1)

GiniClin_Treat = as.data.frame(GiniClin_Treat)

GiniClin_Treat$giniGroup = factor(GiniClin_Treat$giniGroup, levels = c("Above median", "Under median"))

# construct the object
fig8test_2 <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ giniGroup, data = GiniClin_Treat)

print(fig8test_2)

summary(fig8test_2)$table
```


```{r}
fig8b_all_2 <- ggsurvplot(fig8test_2,
                        pval = TRUE,
                        palette = c("#e64b35", "#4dbbd5"),
                        break.time.by = 1,
                        ggtheme = theme_classic(),
                        #legend.labs = c("Above median", "Under median"),
                        risk.table = "abs_pct",
                        risk.table.y.text = FALSE,
                        xlab = "Time (years)",
                        title = "giniSurv_treatment",
                        xlim = c(0,5.2),
                        axes.offset = FALSE,
                        risk.table.fontsize = 2.5,
                        legend = c(0.9,0.9),
                        risk.table.title = "Number at risk by time: n (%)",
                        font.main = c())


fig8b_all_2
```
# Figure mutation burden

## Mutation burden

```{r}
mutClin <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "MutationBurdern_all_tumor")

mutClin <- as.data.frame(mutClin, row.names = mutClin$ID)

mutClin$AB_GiniIndex <- as.numeric(mutClin$AB_GiniIndex)
mutClin$Mutation_load_allmutations <- as.numeric(mutClin$Mutation_load_allmutations)
mutClin <- mutClin[intersect(rownames(mutClin), rownames(GiniClin_tumor)),]
```

```{r}
fig4a <- ggscatter(mutClin[,c("AB_GiniIndex","Mutation_load_allmutations")], x = "AB_GiniIndex", y = "Mutation_load_allmutations",
                   add="reg.line", add.params = list(color = "#00bfc4"),
                   conf.int = FALSE,
                   cor.coef = TRUE,
                   cor.coeff.args = list(method = "spearman", label.x.npc = 0.15, label.y.npc = 0.85),
                   ggtheme = theme_classic(),
                   #add = "reg.line", add.params = c(color = '#00bfc4'),
                   title = "Mutation burden", xlab = "Gini Index", ylab = "Mutation load of all mutations",
                   font.tickslab = c(size=15, color = "black"), font.main = 20,font.y = 15, font.x = 15) +
  scale_y_continuous(expand = c(0,0), limits = c(0, 130), breaks = c(seq(0,120,30))) +
  scale_x_continuous(expand = c(0,0), limits = c(0, 0.75), breaks = c(seq(0,0.6,0.2)))

fig4a
```

## Mutation

```{r}
mutGini <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "mutation_tumor")

mutGini <- as.data.frame(mutGini, row.names = mutGini$ID)
mutGini$ID <- NULL

mutGini <- mutGini[intersect(rownames(mutGini), rownames(GiniClin_tumor)),]
mutGini$CCND1 <- NULL

mutTest <- list()
for (mutGene in colnames(mutGini[,(3:83)])){
  WT_buff <- mutGini[mutGini[,mutGene] == "WT","GiniIndex"]
  Mut_buff <- mutGini[mutGini[,mutGene] == "Mutated","GiniIndex"]
  mutRes <- wilcox.test(WT_buff,Mut_buff)
  mutTest <- c(mutTest,c(mutGene,mutRes$statistic,mutRes$p.value))
}

mutTest.matrix <- matrix(mutTest, ncol = 3, byrow = TRUE)
colnames(mutTest.matrix) <- c("Mutation", "wil.cox", "p_value")
mutTest.matrix <- as.data.frame(mutTest.matrix)
mutTest.matrix[,"FDR"] <- p.adjust(mutTest.matrix$p_value, method = "fdr")

```

### EGFR
```{r}
f4c.egfr <- ggplot(mutGini, aes(x = EGFR, y = GiniIndex, fill = EGFR)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2,color ="black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "EGFR", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
f4c.egfr
```

### APC
```{r}
f4c.apc <- ggplot(mutGini, aes(x = APC, y = GiniIndex, fill=APC)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "APC", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
f4c.apc
```

### TP53
```{r}
f4c.tp53 <- ggplot(mutGini, aes(x = TP53, y = GiniIndex, fill=TP53)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "TP53", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
f4c.tp53
```
### ARID1A
```{r}
f4c.arid1a <- ggplot(mutGini, aes(x = ARID1A, y = GiniIndex, fill=ARID1A)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "ARID1A", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
f4c.arid1a
```

### EPHB6
```{r}
f4c.ephb6 <- ggplot(mutGini, aes(x = EPHB6, y = GiniIndex, fill=EPHB6)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "EPHB6", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = FALSE)
f4c.ephb6
```

### CSMD3
```{r}
f4c.csmd3 <- ggplot(mutGini, aes(x = CSMD3, y = GiniIndex, fill=CSMD3)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
  theme_classic() +
  # coord_flip() +
  scale_fill_manual(values = c("#FFE5D4", "#E6EBE0")) +
  labs(title = "CSMD3", x = NULL) +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20, hjust = 0.5)) +
  #, panel.border = element_rect(color = "black", size = 1, fill = NA)
  stat_compare_means(paired = FALSE)
f4c.csmd3
```

### Combine
```{r}
ggarrange(f4c.egfr, f4c.apc, f4c.tp53, f4c.arid1a, f4c.ephb6, f4c.csmd3, ncol = 2, nrow = 3, align = "hv")
```



# Figure IHC Heatmap

## prepare table
```{r}
mut_t_anno <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "mutation_tumor_anno")

mut_t_anno <- as.data.frame(mut_t_anno, row.names = mut_t_anno$ID)

mut_t_anno$ID <- NULL

ihcAnno <- merge(AB_Gini_anno, mut_t_anno, by = "row.names", all =TRUE)
ihcAnno <- as.data.frame(ihcAnno, row.names = ihcAnno$Row.names)
ihcAnno$Row.names <- NULL
ihcAnno <- merge(ihcAnno, mutGini[,c("APC","ARID1A","EPHB6")],by = "row.names", all=FALSE)

ihcAnno <- merge(ihcAnno, GiniClin[,c("histology_Hans_B_WHO_2015", "gender", "smoke", "status_5_years_trunk_20190329")], by = "row.names",all =TRUE)

ihcAnno <- rename(ihcAnno, c("histology_Hans_B_WHO_2015"="histology", "status_5_years_trunk_20190329"="surv5years"))

ihcAnno <- as.data.frame(ihcAnno, row.names = ihcAnno$Row.names)
ihcAnno$Row.names <- NULL

ihcAnno <- ihcAnno[,c(1:20, 25:27, 21:24)]

ihcHeatT_tumor_norm <- as.data.frame(ihcHeatT_tumor_norm, row.names = ihcHeatT_tumor_norm$ID)
ihcHeatT_tumor_norm$ID <- NULL
ihcHeatT_tumor_norm <- as.data.frame(ihcHeatT_tumor_norm)

ihcHeatT_tumor_norm <- rename(ihcHeatT_tumor_norm, c("Gini Index" = "Gini.Index"))

ihcAnno[!is.na(ihcAnno$smoke) & ihcAnno$smoke == "N", "smoke"] <- "Never"
ihcAnno[!is.na(ihcAnno$smoke) & ihcAnno$smoke == "F", "smoke"] <- "Former"
ihcAnno[!is.na(ihcAnno$smoke) & ihcAnno$smoke == "C", "smoke"] <- "Current"

ihcAnno[!is.na(ihcAnno$gender) & ihcAnno$gender == "F", "gender"] <- "Female"
ihcAnno[!is.na(ihcAnno$gender) & ihcAnno$gender == "M", "gender"] <- "Male"

ihcAnno[!is.na(ihcAnno$surv5years) & ihcAnno$surv5years == 1, "surv5years"] <- "Dead"
ihcAnno[!is.na(ihcAnno$surv5years) & ihcAnno$surv5years == 0, "surv5years"] <- "Alive"

ihcHeatT_tumor_norm2 <- ihcHeatT_tumor_norm[!rownames(ihcHeatT_tumor_norm) %in% c("L483T", "L731T"), ]

ihcHeatT_tumor_norm3 <- ihcHeatT_tumor_norm2[,grep("CD44", colnames(ihcHeatT_tumor_norm2), invert = TRUE)]

```

```{r}
ihcAnno_color = list(
  "Gini index" = colorRampPalette(brewer.pal(n = 9, name = "Reds"))(100),
  "KRAS" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "EGFR" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "PIK3CA" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "FGFR2"= c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "PDGFRA"= c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "ROS1"= c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "NF1" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "STK11" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "TP53" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "KEAP1" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "NFE2L2" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "MUC16" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "KMT2C" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "KMT2D" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "SMARCA4" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "CDKN2A" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "CREBBP" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "CSMD3" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "LRP1B" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "APC" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "ARID1A" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "EPHB6" = c(Mutated = "#D45F51", WT = "#B7D2E8"),
  "histology" = c(AC = "#D55740", SqCC = "#4DBCD6", AdSq = "#479E88", LCC = "#415384", LCNEC = "#F49B7F", SC = "#F8BED6"),
  "gender" = c(Male = "#B9E3DF", Female = "#EFC0D5"),
  "smoke" = c(Current = "#D45F51", Former = "#E68840", Never = "#BCD9B2"),
  "surv5years" = c(Dead = "#D45F51", Alive = "#B7D2E8")
)
```

```{r}
fig5b <- pheatmap(t(ihcHeatT_tumor_norm2), clustering_method = "ward.D2",
                  clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
                  cutree_cols = 3,
                  #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
                  annotation_col = ihcAnno,
                  annotation_colors = ihcAnno_color,
                  fontsize = 5,
                  show_colnames = F
                  )
fig5b
```

```{r}
fig5b.cluster <- fig5b$tree_col

fig5b.cluster.plot <- plot(fig5b.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)

fig5b.cut <- cutree(fig5b.cluster, 3)

fig5b.cut_1 <- names(fig5b.cut)[fig5b.cut == 1]
fig5b.cut_2 <- names(fig5b.cut)[fig5b.cut == 2]
fig5b.cut_3 <- names(fig5b.cut)[fig5b.cut == 3]

fig5b.cut_gini <- merge(AB_Gini_anno, fig5b.cut, by = "row.names", all.y = TRUE)

fig5b.cut_gini <- as.data.frame(fig5b.cut_gini, row.names = fig5b.cut_gini$Row.names)

fig5b.cut_gini$Row.names <- NULL

fig5b.cut_gini <- rename(fig5b.cut_gini, c("y"="Cluster"))
```

```{r}
fig5b.cut_gini[fig5b.cut_gini$Cluster == 2, "Class"] <- "Immune hot"
fig5b.cut_gini[fig5b.cut_gini$Cluster == 3, "Class"] <- "Median"
fig5b.cut_gini[fig5b.cut_gini$Cluster == 1, "Class"] <- "Immune cold"
fig5b.cut_gini$Class = factor(fig5b.cut_gini$Class, levels = c("Immune hot", "Median", "Immune cold"))
```


```{r}
fig5b.cut_gini <- as.data.frame(fig5b.cut_gini)
fig5b.cut_gini2 <- rename(fig5b.cut_gini, c("Gini index"="GiniIndex"))
compare_means(GiniIndex ~ Class, data = fig5b.cut_gini2)
```


```{r}
fig5b.cut_km <- merge(fig5b.cut_gini, GiniClin_tumor[, c("OS_years_5_years_trunk_20190329", "status_5_years_trunk_20190329")], by = "row.names")
fig5b.cut_km <- as.data.frame(fig5b.cut_km, row.names = fig5b.cut_km$Row.names)
fig5b.cut_km$Row.names <- NULL

fig5b.test <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ Class, data = fig5b.cut_km)

summary(fig5b.test)$table
```

```{r}
fig5b.KM <- ggsurvplot(fig5b.test,
                        pval = TRUE,
                        palette = c("#e64b35", "#4dbbd5", "#999999"),
                        break.time.by = 1,
                        ggtheme = theme_classic(),
                        #legend.labs = c("Above median", "Under median"),
                        risk.table = "abs_pct",
                        risk.table.y.text = FALSE,
                        xlab = "Time (years)",
                        title = "giniSurv_Class",
                        xlim = c(0,5.2),
                        axes.offset = FALSE,
                        risk.table.fontsize = 2.5,
                        legend = c(0.9,0.9),
                        risk.table.title = "Number at risk by time: n (%)",
                        font.main = c())
fig5b.KM
```

```{r}
fig5c <- ggplot(fig5b.cut_gini, aes(x = Class, y = `Gini index`, fill=Class)) +
    geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
    theme_classic() +
    # coord_flip() +
    scale_fill_manual(values = c("#e64b35", "#4dbbd5", "#999999")) +
    labs(title = "Class", x = "Class") +
    theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
    stat_compare_means(paired = NULL)
fig5c
```


```{r}
fig5b.cut_clin <- merge(fig5b.cut_km, GiniClin_tumor[,c("histology_Hans_B_WHO_2015", "gender", "smoke")], by = "row.names", all.x = TRUE)

fig5b.cut_clin <- as.data.frame(fig5b.cut_clin, row.names = fig5b.cut_clin$Row.names)

fig5b.cut_clin$Row.names <- NULL

fig5b.cut_clin_AC <- fig5b.cut_clin[fig5b.cut_clin$histology_Hans_B_WHO_2015 == "AC", ]
fig5b.cut_clin_SqCC <- fig5b.cut_clin[fig5b.cut_clin$histology_Hans_B_WHO_2015 == "SqCC", ]

fig5b.cut_clin

fig5b.cut_clin[fig5b.cut_clin$`Gini index` >= 0.25431, "giniGroup"] <- "Above median"
fig5b.cut_clin[fig5b.cut_clin$`Gini index` < 0.25431, "giniGroup"] <- "Under median"
fig5b.cut_clin$giniGroup = factor(fig5b.cut_clin$giniGroup, levels = c("Above median", "Under median"))
```

## cluster without gini index

```{r}
pheatmap(t(ihcHeatT_tumor_norm2[,(2:25)]), clustering_method = "ward.D2",
                  clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
                  cutree_cols = 3,
                  #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
                  annotation_col = ihcAnno,
                  annotation_colors = ihcAnno_color,
                  fontsize = 5,
                  show_colnames = F
                  )

pheatmap(t(ihcHeatT_tumor_norm3[,grep("_S", colnames(ihcHeatT_tumor_norm3))]), clustering_method = "ward.D2",
          clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
          cutree_cols = 3,
          #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F
)
pheatmap(t(ihcHeatT_tumor_norm3[,grep("_S", colnames(ihcHeatT_tumor_norm3))]), clustering_method = "ward.D2",
          clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
          cutree_cols = 3,
          #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F
)
pheatmap(t(ihcHeatT_tumor_norm3[,grep("_T", colnames(ihcHeatT_tumor_norm3))]), clustering_method = "ward.D2",
          clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
          cutree_cols = 2,
          #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F
)
```

```{r}
fig5d_T <- pheatmap(t(ihcHeatT_tumor_norm3[,grep("_T", colnames(ihcHeatT_tumor_norm3))]), clustering_method = "ward.D2",
          clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
          cutree_cols = 2,
          #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F
)
fig5d_T
```

```{r}
fig5d_T.cluster <- fig5d_T$tree_col

fig5d_T.cluster.plot <- plot(fig5d_T.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)

fig5d_T.cut <- cutree(fig5d_T.cluster, 2)

fig5d_T.cut_1 <- names(fig5d_T.cut)[fig5d_T.cut == 1]
fig5d_T.cut_2 <- names(fig5d_T.cut)[fig5d_T.cut == 2]

fig5d_T.cut_gini <- merge(AB_Gini_anno, fig5d_T.cut, by = "row.names", all.y = TRUE)

fig5d_T.cut_gini <- as.data.frame(fig5d_T.cut_gini, row.names = fig5d_T.cut_gini$Row.names)

fig5d_T.cut_gini$Row.names <- NULL

fig5d_T.cut_gini <- rename(fig5d_T.cut_gini, c("Cluster"="y"))
```

```{r}
fig5d_T.cut_gini[fig5d_T.cut_gini$Cluster == 1, "Class"] <- "Immune cold"
fig5d_T.cut_gini[fig5d_T.cut_gini$Cluster == 2, "Class"] <- "Immune hot"
fig5d_T.cut_gini$Class = factor(fig5d_T.cut_gini$Class, levels = c("Immune hot", "Immune cold"))
```

```{r}
fig5d_T.cut_gini <- as.data.frame(fig5d_T.cut_gini)
fig5d_T.cut_gini <- rename(fig5d_T.cut_gini, c("GiniIndex"="Gini index"))
compare_means(GiniIndex ~ Class, data = fig5d_T.cut_gini)
```

```{r}
fig5d_T.box <- ggplot(fig5d_T.cut_gini, aes(x = Class, y = GiniIndex, fill=Class)) +
    geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
    theme_classic() +
    # coord_flip() +
    scale_fill_manual(values = c("#e64b35", "#4dbbd5")) +
    labs(title = "Class", x = "Class") +
    theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
    stat_compare_means(paired = NULL)

fig5d_T.box
```

```{r}
fig5d_T.cut_km <- merge(fig5d_T.cut_gini, GiniClin_tumor[, c("OS_years_5_years_trunk_20190329", "status_5_years_trunk_20190329")], by = "row.names")
fig5d_T.cut_km <- as.data.frame(fig5d_T.cut_km, row.names = fig5d_T.cut_km$Row.names)
fig5d_T.cut_km$Row.names <- NULL

fig5d_T.test <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ Class, data = fig5d_T.cut_km)

summary(fig5d_T.test)$table
```

```{r}
ggsurvplot(fig5d_T.test,
         pval = TRUE,
         palette = c("#e64b35", "#4dbbd5", "#999999"),
         break.time.by = 1,
         ggtheme = theme_classic(),
         #legend.labs = c("Above median", "Under median"),
         risk.table = "abs_pct",
         risk.table.y.text = FALSE,
         xlab = "Time (years)",
         title = "giniSurv_Class",
         xlim = c(0,5.2),
         axes.offset = FALSE,
         risk.table.fontsize = 2.5,
         legend = c(0.9,0.9),
         risk.table.title = "Number at risk by time: n (%)",
         font.main = c())
```


```{r}
fig5d_S <- pheatmap(t(ihcHeatT_tumor_norm3[,grep("_S", colnames(ihcHeatT_tumor_norm3))]), clustering_method = "ward.D2",
        clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
        cutree_cols = 3,
        #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
        annotation_col = ihcAnno,
        annotation_colors = ihcAnno_color,
        fontsize = 5,
        show_colnames = F
)
fig5d_S
```

```{r}
fig5d_S.cluster <- fig5d_S$tree_col

fig5d_S.cluster.plot <- plot(fig5d_S.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)

fig5d_S.cut <- cutree(fig5d_S.cluster, 3)

fig5d_S.cut_1 <- names(fig5d_S.cut)[fig5d_S.cut == 1]
fig5d_S.cut_2 <- names(fig5d_S.cut)[fig5d_S.cut == 2]
fig5d_S.cut_3 <- names(fig5d_S.cut)[fig5d_S.cut == 3]

fig5d_S.cut_gini <- merge(AB_Gini_anno, fig5d_S.cut, by = "row.names", all.y = TRUE)

fig5d_S.cut_gini <- as.data.frame(fig5d_S.cut_gini, row.names = fig5d_S.cut_gini$Row.names)

fig5d_S.cut_gini$Row.names <- NULL

fig5d_S.cut_gini <- rename(fig5d_S.cut_gini, c("Cluster"="y"))
```

```{r}
fig5d_S.cut_gini[fig5d_S.cut_gini$Cluster == 1, "Class"] <- "Immune cold"
fig5d_S.cut_gini[fig5d_S.cut_gini$Cluster == 2, "Class"] <- "Median"
fig5d_S.cut_gini[fig5d_S.cut_gini$Cluster == 3, "Class"] <- "Immune hot"
fig5d_S.cut_gini$Class = factor(fig5d_S.cut_gini$Class, levels = c("Immune hot", "Median", "Immune cold"))
```

```{r}
fig5d_S.cut_gini <- as.data.frame(fig5d_S.cut_gini)
fig5d_S.cut_gini <- rename(fig5d_S.cut_gini, c("GiniIndex"="Gini index"))
compare_means(GiniIndex ~ Class, data = fig5d_S.cut_gini)
```

```{r}
fig5d_S.box <- ggplot(fig5d_S.cut_gini, aes(x = Class, y = GiniIndex, fill=Class)) +
    geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
    theme_classic() +
    # coord_flip() +
    scale_fill_manual(values = c("#e64b35", "#4dbbd5", "#999999")) +
    labs(title = "Class", x = "Class") +
    theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
    stat_compare_means(paired = NULL)
```

```{r}
fig5d_S.cut_km <- merge(fig5d_S.cut_gini, GiniClin_tumor[, c("OS_years_5_years_trunk_20190329", "status_5_years_trunk_20190329")], by = "row.names")
fig5d_S.cut_km <- as.data.frame(fig5d_S.cut_km, row.names = fig5d_S.cut_km$Row.names)
fig5d_S.cut_km$Row.names <- NULL

fig5d_S.test <- survfit(Surv(OS_years_5_years_trunk_20190329, status_5_years_trunk_20190329) ~ Class, data = fig5d_S.cut_km)

summary(fig5d_S.test)$table
```

```{r}
ggsurvplot(fig5d_S.test,
         pval = TRUE,
         palette = c("#e64b35", "#4dbbd5", "#999999"),
         break.time.by = 1,
         ggtheme = theme_classic(),
         #legend.labs = c("Above median", "Under median"),
         risk.table = "abs_pct",
         risk.table.y.text = FALSE,
         xlab = "Time (years)",
         title = "giniSurv_Class",
         xlim = c(0,5.2),
         axes.offset = FALSE,
         risk.table.fontsize = 2.5,
         legend = c(0.9,0.9),
         risk.table.title = "Number at risk by time: n (%)",
         font.main = c())
```


```{r}
ihcHeatT_tumor_norm4 <- ihcHeatT.norm3[rownames(ihcHeatT_tumor_norm2),grep("SAndTM", colnames(ihcHeatT.norm3))]

ihcHeatT_tumor_norm4 <- merge(AB_Gini_anno, ihcHeatT_tumor_norm4, by="row.names", all=FALSE)

ihcHeatT_tumor_norm4 <- as.data.frame(ihcHeatT_tumor_norm4, row.names = ihcHeatT_tumor_norm4$Row.names)
ihcHeatT_tumor_norm4$Row.names <- NULL

ihcHeatT_tumor_norm4 <- rename(ihcHeatT_tumor_norm4, c("GiniIndex"="Gini index"))

ihcHeatT_tumor_norm4 <- ihcHeatT_tumor_norm4 %>% rename_with(~ gsub("SQ1", "_",.x))

ihcHeatT_tumor_norm4 <- ihcHeatT_tumor_norm4[,grep("CD44", colnames(ihcHeatT_tumor_norm4), invert = TRUE)]
```

```{r}
fig5d_ST <- pheatmap(t(ihcHeatT_tumor_norm4[,c(2:12)]), clustering_method = "ward.D2",
        clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
        cutree_cols = 2,
        #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
        annotation_col = ihcAnno,
        annotation_colors = ihcAnno_color,
        fontsize = 5,
        show_colnames = F)

fig5d_ST
```

```{r}
fig5d_ST.cluster <- fig5d_ST$tree_col

fig5d_ST.cluster.plot <- plot(fig5d_ST.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)

#plot(fig5d_ST.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)

fig5d_ST.cut <- cutree(fig5d_ST.cluster, 2)

fig5d_ST.cut_1 <- names(fig5d_ST.cut)[fig5d_ST.cut == 1]
fig5d_ST.cut_2 <- names(fig5d_ST.cut)[fig5d_ST.cut == 2]

fig5d_ST.cut_gini <- merge(AB_Gini_anno, fig5d_ST.cut, by = "row.names", all.y = TRUE)

fig5d_ST.cut_gini <- as.data.frame(fig5d_ST.cut_gini, row.names = fig5d_ST.cut_gini$Row.names)

fig5d_ST.cut_gini$Row.names <- NULL

fig5d_ST.cut_gini <- rename(fig5d_ST.cut_gini, c("Cluster"="y"))
```

```{r}
fig5d_ST.cut_gini[fig5d_ST.cut_gini$Cluster == 1, "Class"] <- "Immune cold"
fig5d_ST.cut_gini[fig5d_ST.cut_gini$Cluster == 2, "Class"] <- "Immune hot"
fig5d_ST.cut_gini$Class = factor(fig5d_ST.cut_gini$Class, levels = c("Immune hot", "Immune cold"))
```

```{r}
fig5d_ST.cut_gini <- as.data.frame(fig5d_ST.cut_gini)
fig5d_ST.cut_gini <- rename(fig5d_ST.cut_gini, c("GiniIndex"="Gini index"))
compare_means(GiniIndex ~ Class, data = fig5d_ST.cut_gini)
```

```{r}
fig5d_ST.box <- ggplot(fig5d_ST.cut_gini, aes(x = Class, y = GiniIndex, fill=Class)) +
    geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black") +
    theme_classic() +
    # coord_flip() +
    scale_fill_manual(values = c("#e64b35", "#4dbbd5", "#999999")) +
    labs(title = "Class", x = "Class") +
    theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
    stat_compare_means(paired = NULL)
fig5d_ST.box
```
## Ordered by Gini
```{r}
pheatmap(t(ihcHeatT_tumor_norm4[order(ihcHeatT_tumor_norm4$GiniIndex),c(2:12)]),
         #clustering_method = "ward.D2",
         cluster_rows = F, cluster_cols = F,
         cutree_cols = 2,
         #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 8,
         show_colnames = F)
```

# IHC correlation plot

```{r}
fig6 <- corrplot(data.matrix(ihcCorR), method = "circle",col = rev(COL2('RdBu', 200)),
                 p.mat = data.matrix(ihcCorP), insig = "label_sig", sig.level = c(.001, .01, .05), pch.cex = 0.8,
                 tl.col = "black")
fig6
```

```{r}
ihcCorP_melt <- as.data.frame(ihcCorP_melt)
ihcCorP_melt$var1 <- factor(ihcCorP_melt$var1, levels = c("CD3","CD4","CD8","CD20","CD44","CD45RO","CD138","CD163","FOXP3","NKp46","PD1","PDL1"))
ihcCorP_melt$var2 <- factor(ihcCorP_melt$var2, levels = c("SAndT","Tumor", "Stroma"))

fig6b <- ggplot(ihcCorP_melt, aes(var1, var2, fill = Corr)) +
  geom_point(shape=21, colour = "black", size = 12)+
  theme_classic()+
  scale_fill_gradientn(colours = rev(brewer.pal(5, "Spectral")))+
  labs(title = "Correlation", x = "IHC marker", y = "Position") +
  theme(axis.text = element_text(size = 10, colour = "black"), axis.title = element_text(size = 10),plot.title = element_text(size=15))+
  geom_text(aes(label = label), size = 5, colour = "black")

fig6b
```


# Figure 9 Lymphotrack cluster plot

## L481
```{r}
L481_L <- as.data.frame(L481_L)

L481_R <- as.data.frame(L481_R)

L481_all <- as.data.frame(L481_all)

L481_L["Class"] <- "Lymphotrack"

L481_R["Class"] <- "RNAseq"
```

```{r}
npj.pal = pal_npg("nrc", alpha = 0.9)(10)
simp.pal = pal_simpsons("springfield")(16)
nejm.pal = pal_nejm("default", alpha = 0.9)(8)
jama.pal = pal_jama("default",alpha = 0.9)(7)
fig9_pal = c(c("#E0E3E3FF"), npj.pal, simp.pal, nejm.pal, jama.pal)

fig9_L481 <- ggplot(L481_all, aes(x= Type, y= Fraction, fill= Clone)) +
  geom_col(position = "stack", width = 0.6) +
  theme_classic() +
  scale_y_continuous(expand = c(0,0)) +
  scale_fill_manual(values = fig9_pal) +
  scale_y_break(c(40,95), scales = 0.2, ticklabels = c(95,100))


gg.gap(plot = fig9_L481,
       segments = c(40,80),
       ylim = c(0, 100),
       tick_width = c(5, 10),
       rel_heights = c(0.3, 0, 0.1)
       )

ggplot(L481_L, aes(x=Class, y= Fraction, fill= reorder(Lymphotrack, -Fraction))) +
     geom_col(position = "stack", width = 0.6) +
     theme_classic() +
     scale_fill_manual(values = fig9_pal) +
     scale_y_break(c(7,92), scales = 0.2, ticklabels = c(95,100), expand = FALSE) +
     scale_y_continuous(expand = c(0,0), breaks = c(0,2,4,6))

ggplot(L481_R, aes(x=Class, y= Fraction, fill= reorder(RNAseq, -Fraction))) +
     geom_col(position = "stack", width = 0.6) +
     theme_classic() +
     scale_fill_manual(values = fig9_pal) +
     scale_y_break(c(40,92), scales = 0.2, ticklabels = c(95,100), expand = FALSE) +
     scale_y_continuous(expand = c(0,0), breaks = c(0,10,20,30,40))

```


# Fig10 Vocanol plot

```{r}
diffGene <- read.table('/Users/huiyu818/Project/TCR/DifferentialExpr/bomi2_fullResult.txt', sep = "\t", header = TRUE, row.names = 1)

# Sort the table according to the padj(adjusted p-value) ascending.
# For genes with same padj, order according to the log2FC descending.
# Then , at the end of the coding is essensial
diffGene <- diffGene[order(diffGene$padj, diffGene$log2FoldChange, decreasing = c(FALSE, TRUE)),]

library('biomaRt')

mart <- useDataset("hsapiens_gene_ensembl", useMart("ensembl"))

diff_genes_eid <- rownames(diffGene)

diff_genes_G_list <- getBM(filters="ensembl_gene_id", attributes=c("ensembl_gene_id", "hgnc_symbol", "description"), values=diff_genes_eid, mart=mart)

# diffGene_hgnc <- merge(diffGene, diff_genes_G_list, by.x="row.names", by.y = "ensembl_gene_id")

# The significantly up-regulated genes are labeled with 'UP'
diffGene[which(diffGene$log2FoldChange >= 1 & diffGene$padj < 0.01), 'sign'] <- 'UP'

# The significantly down-regulated genes are labeled with 'DOWN'
diffGene[which(diffGene$log2FoldChange <= -1 & diffGene$padj < 0.01), 'sign'] <- 'DOWN'

# The rest genes are labeled 'none'
diffGene[which(abs(diffGene$log2FoldChange) <= 1 | diffGene$padj >= 0.01), 'sign'] <- 'none'

diffGene["ENSG00000180644", "label"] <- "PRF1"
diffGene["ENSG00000100453", "label"] <- "GZMB"
diffGene["ENSG00000100450", "label"] <- "GZMH"
diffGene["ENSG00000120217", "label"] <- "PD-L1"
diffGene["ENSG00000089692", "label"] <- "LAG3"
diffGene["ENSG00000181847", "label"] <- "TIGIT"
diffGene[diff_genes_G_list[diff_genes_G_list$hgnc_symbol == "IFNG","ensembl_gene_id"], "label"] <- "IFNG"
diffGene[diff_genes_G_list[diff_genes_G_list$hgnc_symbol == "IL17A","ensembl_gene_id"], "label"] <- "IL17A"
diffGene[diff_genes_G_list[diff_genes_G_list$hgnc_symbol == "NKG7","ensembl_gene_id"], "label"] <- "NKG7"
diffGene[diff_genes_G_list[diff_genes_G_list$hgnc_symbol == "FASLG","ensembl_gene_id"], "label"] <- "FASLG"
diffGene[diff_genes_G_list[diff_genes_G_list$hgnc_symbol == "GZMA","ensembl_gene_id"], "label"] <- "GZMA"
diffGene[diff_genes_G_list[diff_genes_G_list$hgnc_symbol == "NCR1","ensembl_gene_id"], "label"] <- "NCR1"
diffGene[diff_genes_G_list[diff_genes_G_list$hgnc_symbol == "GZMM","ensembl_gene_id"], "label"] <- "GZMM"

library(ggrepel)

fig10 <- ggplot(data = diffGene, aes(x = log2FoldChange, y = -log10(padj), color = sign)) + 
  geom_point(size = 1) +  #Create scatter plot
  scale_color_manual(values = c('red', 'gray', 'green'), limits = c('UP', 'none', 'DOWN')) +  #Set the color
  labs(x = 'log2 Fold Change', y = '-log10 adjust p-value', title = 'High clonality vs Low clonality', color = '') +  #Set the labels
  theme(plot.title = element_text(hjust = 0.5, size = 14), panel.grid = element_blank(), #Change the them and grid lines
  panel.background = element_rect(color = 'black', fill = 'transparent'), 
  legend.key = element_rect(fill = 'transparent')) +
  geom_vline(xintercept = c(-1, 1), lty = 3, color = 'black') +  #Add threshold line
  geom_hline(yintercept = 2, lty = 3, color = 'black') +
  xlim(-12, 12) + ylim(0, 35) + #Set the curve boundary
  geom_text_repel(
    data = diffGene,
    aes(label = label),
    size = 3, segment.color = "blue", show.legned = FALSE,
    point.padding = unit(0.8, "lines")
  )

fig10
```


#Figure11 Phenotype

```{r}
library(Hmisc)

library(corrplot)

library(readxl)

bomi2_pheno <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities.xlsx", sheet = "phenotyped_densities")

View(bomi2_pheno)

bomi2_pheno <- as.data.frame(bomi2_pheno, row.names = bomi2_pheno$ID)

phenoClin <- merge(GiniClin_tumor, bomi2_pheno, by = "ID", sort = TRUE)

phenoClin <- as.data.frame(phenoClin, row.names = phenoClin$ID)

write.csv(phenoClin, file = "/Users/huiyu818/Project/BOMI2/bomi2_phenotyped_densities_clin.csv")

phenoClin_gini <- merge(GiniClin_tumor[, c("ID", "GiniIndex", "giniGroup")], bomi2_pheno, by="ID", sort = TRUE)

phenoClin_gini <- as.data.frame(phenoClin_gini, row.names = phenoClin_gini$ID)
phenoClin_gini$ID <- NULL

phenoClin_gini2 <- subset(phenoClin_gini, select = -giniGroup)

phenoClin_gini2 <- as.data.frame(phenoClin_gini2)

phenoClin_corr <- rcorr(as.matrix(phenoClin_gini2), type = "spearman")

write.csv(phenoClin_corr$r, file = "~/Project/TCR_Summary/Figure11/phenoCor.csv")
write.csv(phenoClin_corr$P, file = "~/Project/TCR_Summary/Figure11/phenoPvalue.csv")

cor(phenoClin_gini2, use = "pairwise.complete.obs", method = "spearman")
# recorr and cor fits well

phenoCor_melt <- read_excel("~/Project/TCR_Summary/Figure11/giniPheno.xlsx", sheet = "merge")

phenoCor_melt <- as.data.frame(phenoCor_melt)

phenoCor_melt$var1 <- factor(phenoCor_melt$var1, levels = c("CD4 Eff","CD4 Treg","CD8 Eff","CD8 Treg","B-cell","NK-cell","NKT-cell","M1","M2","CD163","iDC","mDC","pDC"))

phenoCor_melt$var2 <- factor(phenoCor_melt$var2, levels = c("SAndT","Tumor", "Stroma"))

fig11 <- ggplot(phenoCor_melt, aes(var1, var2, fill = Corr)) +
  geom_point(shape=21, colour = "black", size = 12)+
  theme_classic()+
  scale_fill_gradientn(colours = rev(brewer.pal(5, "Spectral")))+
  labs(title = "Correlation", x = "Cell phenotypes", y = "Position") +
  theme(axis.text = element_text(size = 10, colour = "black"), axis.title = element_text(size = 10),plot.title = element_text(size=15))+
  geom_text(aes(label = label), size = 5, colour = "black")

fig11
```

## Cluster
### All gini

This method start from the t cell pheno type data to check if the gini index is linked with these group. But there is also a angle that start from Gini index (25%) to analyze the t cell composition changes
```{r}

# ihcHeatT_tumor_norm4 <- ihcHeatT.norm3[rownames(ihcHeatT_tumor_norm2),grep("SAndTM", colnames(ihcHeatT.norm3))]
# 
# ihcHeatT_tumor_norm4 <- merge(AB_Gini_anno, ihcHeatT_tumor_norm4, by="row.names", all=FALSE)
# 
# ihcHeatT_tumor_norm4 <- as.data.frame(ihcHeatT_tumor_norm4, row.names = ihcHeatT_tumor_norm4$Row.names)
# ihcHeatT_tumor_norm4$Row.names <- NULL
# 
# ihcHeatT_tumor_norm4 <- rename(ihcHeatT_tumor_norm4, c("GiniIndex"="Gini index"))
# 
# ihcHeatT_tumor_norm4 <- ihcHeatT_tumor_norm4 %>% rename_with(~ gsub("SQ1", "_",.x))
# 
# ihcHeatT_tumor_norm4 <- ihcHeatT_tumor_norm4[,grep("CD44", colnames(ihcHeatT_tumor_norm4), invert = TRUE)]

phenoClin_gini3 <- phenoClin_gini2[, grep("Pan", colnames(phenoClin_gini2), invert = T)]

phenoClin_gini3.norm <- cbind(phenoClin_gini3[,1],scale(phenoClin_gini3[,c(2:40)], center = T, scale = T))
phenoClin_gini3.norm <- as.data.frame(phenoClin_gini3.norm)
colnames(phenoClin_gini3.norm)[colnames(phenoClin_gini3.norm) == "V1"] = "GiniIndex"
```

```{r}
min.max.norm <- function(x){
  (x - min(x, na.rm = TRUE))/(max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}

phenoClin_gini3.norm2 <- apply(phenoClin_gini3[, c(2:40)], 2, min.max.norm)
phenoClin_gini3.norm2 <- as.data.frame(phenoClin_gini3.norm2)
phenoClin_gini3.norm2 <- cbind(phenoClin_gini3[,1], phenoClin_gini3.norm2)
colnames(phenoClin_gini3.norm2)[colnames(phenoClin_gini3.norm2) == "phenoClin_gini3[, 1]"] = "GiniIndex"
```


```{r}
pheatmap(t(phenoClin_gini2[,grep("st_", colnames(phenoClin_gini2), invert = T)]), clustering_method = "ward.D2",
        clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
        cutree_cols = 2,
        #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
        annotation_col = ihcAnno,
        annotation_colors = ihcAnno_color,
        fontsize = 5,
        show_colnames = F)

pheatmap(t(phenoClin_gini3.norm[,c(1,grep("st_", colnames(phenoClin_gini3.norm[,c(2:16)]), invert = T)+1)]), clustering_method = "ward.D2",
         clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
         cutree_cols = 2,
         #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 5,
         show_colnames = F)
pheatmap(t(phenoClin_gini3.norm2[,grep("st_", colnames(phenoClin_gini3.norm2[,c(1:16)]), invert = T)]), clustering_method = "ward.D2",
         clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
         cutree_cols = 2,
         #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 5,
         show_colnames = F)

pheatmap(t(phenoClin_gini3.norm2[,grep("st_", colnames(phenoClin_gini3.norm2[,c(2:16)]))]), clustering_method = "ward.D2",
          cluster_rows = F, clustering_distance_cols = "euclidean",
          cutree_cols = 5,
          #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F)
```

```{r}
fig11b <- pheatmap(t(phenoClin_gini3.norm2[,c(1,grep("st_", colnames(phenoClin_gini3.norm2[,c(2:16)]))+1)]), clustering_method = "ward.D2",
          cluster_rows = F, clustering_distance_cols = "euclidean",
          cutree_cols = 3,
          #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
          annotation_col = ihcAnno,
          annotation_colors = ihcAnno_color,
          fontsize = 5,
          show_colnames = F)
fig11b
# pheatmap(t(phenoClin_gini3.norm2[,c(1,grep("st_", colnames(phenoClin_gini3.norm2[,c(2:16)]))+1)]), clustering_method = "ward.D2",
#          cluster_rows = F, clustering_distance_cols = "euclidean",
#          cutree_cols = 6,
#          #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
#          annotation_col = ihcAnno,
#          annotation_colors = ihcAnno_color,
#          fontsize = 10,
#          show_colnames = F)
```

```{r}
pheatmap(t(phenoClin_gini3.norm2[,grep("st_", colnames(phenoClin_gini3.norm2[,c(2:13)]))+1]), clustering_method = "ward.D2",
         cluster_rows = F, clustering_distance_cols = "euclidean",
         cutree_cols = 4,
         #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 10,
         show_colnames = F)
```


```{r}
fig11b.cluster <- fig11b$tree_col

fig11b.cluster.plot <- plot(fig11b.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)

fig11b.cut <- cutree(fig5d_T.cluster, 3)

# fig5d_T.cut_1 <- names(fig5d_T.cut)[fig5d_T.cut == 1]
# fig5d_T.cut_2 <- names(fig5d_T.cut)[fig5d_T.cut == 2]
# 
# fig5d_T.cut_gini <- merge(AB_Gini_anno, fig5d_T.cut, by = "row.names", all.y = TRUE)
# 
# fig5d_T.cut_gini <- as.data.frame(fig5d_T.cut_gini, row.names = fig5d_T.cut_gini$Row.names)
# 
# fig5d_T.cut_gini$Row.names <- NULL
# 
# fig5d_T.cut_gini <- rename(fig5d_T.cut_gini, c("Cluster"="y"))
```

```{r}
fig11c <- pheatmap(t(phenoClin_gini3.norm2[!(rownames(phenoClin_gini3.norm2) == "L464T"),grep("st_", colnames(phenoClin_gini3.norm2[,c(1:14)]))]),
                   clustering_method = "ward.D2",
                   cluster_rows = F, clustering_distance_cols = "euclidean",
                   cutree_cols = 4,
                   #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
                   annotation_col = ihcAnno,
                   annotation_colors = ihcAnno_color,
                   fontsize = 5,
                   show_colnames = F)
```

```{r}
fig11c.cluster <- fig11c$tree_col

fig11c.cluster.plot <- plot(fig11c.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)

fig11c.cut <- cutree(fig11c.cluster, 4)
```

```{r}
fig11c.cut_gini <- merge(phenoClin_gini3.norm2[!(rownames(phenoClin_gini3.norm2) == "L464T"),c(1,grep("st_", colnames(phenoClin_gini3.norm2[,c(1:14)])))],
                         fig11c.cut, by = "row.names", all.y = TRUE)
fig11c.cut_gini <- as.data.frame(fig11c.cut_gini, row.names = fig11c.cut_gini$Row.names)
fig11c.cut_gini$Row.names <- NULL
colnames(fig11c.cut_gini)[colnames(fig11c.cut_gini) == "y"] <- "CutGroup"
fig11c.cut_gini$CutGroup = factor(fig11c.cut_gini$CutGroup)
```

```{r}
ggplot(fig11c.cut_gini, aes(x = CutGroup, y = GiniIndex, fill=CutGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  #scale_color_npg() +
  labs(title = "Gini", x = "Group") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)
```
```{r}
ggplot(fig11c.cut_gini, aes(x = CutGroup, y = s_CD4_Treg, fill=CutGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  #scale_color_npg() +
  labs(title = "CD4_Treg", x = "Group") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)
```
```{r}
ggplot(fig11c.cut_gini, aes(x = CutGroup, y = st_CD4_Eff, fill=CutGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  #scale_color_npg() +
  labs(title = "CD4_Eff", x = "Group") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)
```

```{r}
fig11c.cut_gini.melt <- melt(fig11c.cut_gini, na.rm = F)

ggplot(fig11c.cut_gini.melt, aes(x = variable, y = value, fill=CutGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "T cell phenotype") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))
  #stat_compare_means(paired = NULL)
```
```{r}
ggplot(fig11c.cut_gini.melt, aes(x = variable, y = value, fill=variable)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "T cell phenotype") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  facet_wrap(~CutGroup,ncol = 1)
```

### Smooth line
```{r}
fig11c.cut_gini.melt2 <- melt(cbind("ID"=rownames(fig11c.cut_gini),fig11c.cut_gini[,c(1:5)]), na.rm = F, id.vars = c("ID", "GiniIndex"), variable.factor =F)

fig11c.cut_gini.melt2$variable <- as.factor(fig11c.cut_gini.melt2$variable)

# Smooth value trend line
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value)) +
  geom_smooth(aes(color=variable,fill=variable), method = "loess") +
  theme_classic() +
  #coord_flip() +
  scale_color_npg(palette = c("nrc"), alpha=1) +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "Gini coefficient") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw stacking value
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value,fill=variable)) +
  geom_area() +
  theme_classic() +
  #coord_flip() +
  scale_color_npg(palette = c("nrc"), alpha=1) +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "Gini coefficient") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw fraction change
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "fill") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# smooth density value
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value,fill=variable)) +
  geom_area(fill=NA) +
  stat_smooth(geom = "area", method = "loess", span = 1/3, alpha=0.4)+
  theme_classic() +
  #coord_flip() +
  scale_color_npg(palette = c("nrc"), alpha=1) +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "Gini coefficient") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smooth stacking density value
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "stack",span = 0.5) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smooth fraction changes
ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "fill",span = 0.3) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))+
    scale_x_continuous(expand = c(0,0), limits = c(0,0.8)) + scale_y_continuous(expand = c(0,0), limits = c(0,1))


# Raw fraction change of unnormalized data
phenoClin_gini3.melt <- melt(cbind("ID"=rownames(phenoClin_gini3),phenoClin_gini3[,c(1,grep("st_", colnames(phenoClin_gini3[,c(1:13)])))]),
                             na.rm = F, id.vars = c("ID", "GiniIndex"), variable.factor =F)

ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "fill") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw stacking of unnormalized data
ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "stack") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smooth fraction change of unnormalized data
ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "fill",span = 0.3) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))+
    scale_y_continuous(expand = c(0,0), limits = c(0,1))

# Smooth stacking unnormalized data
ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "stack",span = 0.3) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

```

```{r}
# Raw stacking plot with sampple density using 
fig11_gini <- ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = 1)) + geom_vline(xintercept=fig11c.cut_gini.melt2[fig11c.cut_gini.melt2$variable == "st_CD4_Eff", "GiniIndex"]) + theme_classic() + theme(panel.background = element_blank())

# normalized raw stacking data
fig11e_normS <- ggplot(fig11c.cut_gini.melt2, aes(x = GiniIndex, y = value,fill=variable)) +
  geom_area() +
  theme_classic() +
  #coord_flip() +
  scale_color_npg(palette = c("nrc"), alpha=1) +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "Gini coefficient") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# raw stacking data
fig11e_rawS <- ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value,fill=variable)) +
  geom_area() +
  theme_classic() +
  #coord_flip() +
  scale_color_npg(palette = c("nrc"), alpha=1) +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "Gini coefficient") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# smoothed raw stacking data
fig11e_rawStackSmooth <- ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "stack",span = 0.4) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.9) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# smoothed raw fraction change data
fig11e_rawSmooth <- ggplot(phenoClin_gini3.melt, aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "fill",span = 0.4) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.9) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

ggarrange(fig11e_rawS, fig11_gini, ncol = 1, nrow = 2, align = "hv")

ggarrange(fig11e_rawSmooth, fig11_gini, ncol = 1, nrow = 2, align = "hv")

ggarrange(fig11e_rawStackSmooth, fig11e_rawSmooth, fig11_gini, ncol = 1, nrow = 3, align = "hv", common.legend = T)

```


### Ordered by gini
```{r}
pheatmap(t(phenoClin_gini3.norm2[order(phenoClin_gini3.norm2$GiniIndex),c(2:40)]),
         clustering_method = "ward.D2",
         cluster_rows = F, clustering_distance_cols = "euclidean",
         cutree_cols = 4,
         #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 5,
         show_colnames = F)

pheatmap(t(phenoClin_gini3.norm2[order(phenoClin_gini3.norm2$GiniIndex),c(17:31)]),
         clustering_method = "ward.D2",
         cluster_rows = F, cluster_cols = F,
         cutree_cols = 4,
         #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
         annotation_col = ihcAnno,
         annotation_colors = ihcAnno_color,
         fontsize = 5,
         show_colnames = F)
```

### High gini group
```{r}
fig11d <- pheatmap(t(phenoClin_gini3.norm2[phenoClin_gini3.norm2$GiniIndex >= 0.25431,grep("st_", colnames(phenoClin_gini3.norm2[,c(1:14)]))]),
                   clustering_method = "ward.D2",
                   cluster_rows = F, clustering_distance_cols = "euclidean",
                   cutree_cols = 3,
                   #color = colorRampPalette(brewer.pal(n = 9, name = "YlOrBr"))(100),
                   annotation_col = ihcAnno,
                   annotation_colors = ihcAnno_color,
                   fontsize = 5,
                   show_colnames = F)

```

```{r}
fig11d.cluster <- fig11d$tree_col

fig11d.cluster.plot <- plot(fig11d.cluster, hang = -1, cex=0.6, axes=FALSE, ann=FALSE)

fig11d.cut <- cutree(fig11d.cluster, 3)
```
```{r}
fig11d.cut_gini <- merge(phenoClin_gini3.norm2[!(rownames(phenoClin_gini3.norm2) == "L464T"),c(1,grep("st_", colnames(phenoClin_gini3.norm2[,c(1:14)])))],
                         fig11d.cut, by = "row.names", all.y = TRUE)
fig11d.cut_gini <- as.data.frame(fig11d.cut_gini, row.names = fig11d.cut_gini$Row.names)
fig11d.cut_gini$Row.names <- NULL
colnames(fig11d.cut_gini)[colnames(fig11d.cut_gini) == "y"] <- "CutGroup"
fig11d.cut_gini$CutGroup = factor(fig11d.cut_gini$CutGroup)
```

```{r}
fig11d.cut_gini.melt <- melt(fig11d.cut_gini, na.rm = F)

ggplot(fig11d.cut_gini.melt, aes(x = variable, y = value, fill=CutGroup)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "T cell phenotype") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))
  #stat_compare_means(paired = NULL)
```

```{r}
# Raw stacking change of unnormalized data (Gini high group)
ggplot(phenoClin_gini3.melt[phenoClin_gini3.melt$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "stack") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smoothed stacking change of unnormalized data (Gini high group)
ggplot(phenoClin_gini3.melt[phenoClin_gini3.melt$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "stack",span = 0.3) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw fraction change of unnormalized data (Gini high group)
ggplot(phenoClin_gini3.melt[phenoClin_gini3.melt$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "fill") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smoothed fraction change of unnormalized data (Gini high group)
ggplot(phenoClin_gini3.melt[phenoClin_gini3.melt$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "fill",span = 0.3) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw stacking value (Gini high group)
ggplot(fig11c.cut_gini.melt2[fig11c.cut_gini.melt2$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value,fill=variable)) +
  geom_area() +
  theme_classic() +
  #coord_flip() +
  scale_color_npg(palette = c("nrc"), alpha=1) +
  scale_fill_npg(palette = c("nrc"), alpha=1) +
  labs(x = "Gini coefficient") +
  theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Raw fraction value (Gini high group)
ggplot(fig11c.cut_gini.melt2[fig11c.cut_gini.melt2$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value,fill=variable)) +
    geom_area(position = "fill") +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=0.8) +
    scale_fill_npg(palette = c("nrc"), alpha=0.8) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smoothed stacking change (Gini high group)
ggplot(fig11c.cut_gini.melt2[fig11c.cut_gini.melt2$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "stack",span = 0.5) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))

# Smoothed fraction change (Gini high group)
ggplot(fig11c.cut_gini.melt2[fig11c.cut_gini.melt2$GiniIndex >= 0.25431,], aes(x = GiniIndex, y = value)) +
    stat_smooth(aes(color=variable,fill=variable), method = "loess", geom="area", position = "fill",span = 0.5) +
    theme_classic() +
    #coord_flip() +
    scale_color_npg(palette = c("nrc"), alpha=1) +
    scale_fill_npg(palette = c("nrc"), alpha=0.7) +
    labs(x = "Gini coefficient") +
    theme(legend.position = "top",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20))


```


# Fig12
```{r}
PhenoAll <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities_sep.xlsx", sheet = "All")
PhenoAllNoCK <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities_sep.xlsx", sheet = "noCK")
PhenoTIL <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities_sep.xlsx", sheet = "TIL")
PhenoNK <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities_sep.xlsx", sheet = "NK")
PhenoAPC <- read_excel("~/Project/BOMI2/bomi2_phenotyped_densities_sep.xlsx", sheet = "APC")

PhenoAll <- as.data.frame(PhenoAll)
PhenoAllNoCK <- as.data.frame(PhenoAllNoCK)
PhenoTIL <- as.data.frame(PhenoTIL)
PhenoNK <- as.data.frame(PhenoNK)
PhenoAPC <- as.data.frame(PhenoAPC)

PhenoAll$Location <- factor(PhenoAll$Location, levels = c("Tumor", "Stroma", "SAndT"))
PhenoAllNoCK$Location <- factor(PhenoAllNoCK$Location, levels = c("Tumor", "Stroma", "SAndT"))
PhenoTIL$Location <- factor(PhenoTIL$Location, levels = c("Tumor", "Stroma", "SAndT"))
PhenoNK$Location <- factor(PhenoNK$Location, levels = c("Tumor", "Stroma", "SAndT"))
PhenoAPC$Location <- factor(PhenoAPC$Location, levels = c("Tumor", "Stroma", "SAndT"))

PhenoAll$PhenoType <- factor(PhenoAll$PhenoType, levels = c("CD4_Eff","CD4_Treg","CD8_Eff","CD8_Treg","B_cells","NK_cells","NKT_cells","M1","M2","CD163","iDC","mDC","pDC","PanCKsingle_TIL", "PanCKsingle_NK","PanCKsingle_APC"))
PhenoAllNoCK$PhenoType <- factor(PhenoAllNoCK$PhenoType, levels = c("CD4_Eff","CD4_Treg","CD8_Eff","CD8_Treg","B_cells","NK_cells","NKT_cells","M1","M2","CD163","iDC","mDC","pDC"))
PhenoTIL$PhenoType <- factor(PhenoTIL$PhenoType, levels = c("CD4_Eff","CD4_Treg","CD8_Eff","CD8_Treg","B_cells","PanCKsingle_TIL"))
PhenoNK$PhenoType <- factor(PhenoNK$PhenoType, levels = c("NK_cells","NKT_cells","M1","M2","CD163","PanCKsingle_NK"))
PhenoAPC$PhenoType <- factor(PhenoAPC$PhenoType, levels = c("iDC","mDC","pDC","PanCKsingle_APC"))

```

```{r}
fig12a <- ggplot(PhenoAllNoCK, aes(x = PhenoType, y = value, fill=giniGroup)) +
    geom_boxplot(outlier.shape = 16, outlier.size = 2, ) +
    theme_classic() +
    # coord_flip() +
    # scale_fill_npg() +
    theme(legend.position = "none") +
    stat_compare_means(paired = NULL, size=2) +
    facet_wrap(~Location, nrow = 3, strip.position = "left", scale="free") +
    scale_y_continuous(expand = c(0,0))


fig12b <- ggplot(PhenoAllNoCK, aes(x = Location, y = value, fill=giniGroup)) +
    geom_boxplot(outlier.shape = 16, outlier.size = 2, ) +
    theme_classic() +
    # coord_flip() +
    # scale_fill_npg() +
    theme(legend.position = "none") +
    stat_compare_means(paired = NULL, size=1.5) +
    facet_wrap(~PhenoType, nrow = 3, strip.position = "left", scale="free")

```



# Fig13 Distance data
```{r}
DistTIL <- read_excel("~/Project/TCR/Distance/BOMI2_dist.xlsx", sheet = "TIL_Gini")
DistTIL <- as.data.frame(DistTIL)
TIL_corrT <- rcorr(as.matrix(DistTIL[,c(5,27:56)]), type = "spearman")
write.csv(TIL_corrT$r, file = "~/Project/TCR/Distance/TIL_corr.csv")
write.csv(TIL_corrT$P, file = "~/Project/TCR/Distance/TIL_corr_Pvalue.csv")

# melt
TIL_corr_melt <- read_excel("~/Project/TCR/Distance/BOMI2_dist.xlsx", sheet = "TIL_Corr")
TIL_corr_melt[,"P_adjust"] <- p.adjust(TIL_corr_melt$`P-Value`,method = "BH")
TIL_corr_melt$label <- NULL
TIL_corr_melt.1d[TIL_corr_melt.1d$var1]
TIL_corr_melt[TIL_corr_melt$P_adjust <= 0.001,"label"] <- "***"
TIL_corr_melt[TIL_corr_melt$P_adjust > 0.001 & TIL_corr_melt$P_adjust <= 0.01,"label"] <- "**"
TIL_corr_melt[TIL_corr_melt$P_adjust > 0.01 & TIL_corr_melt$P_adjust <= 0.05,"label"] <- "*"
TIL_corr_melt$Correlation
```

```{r}
ggplot(TIL_corr_melt, aes(var1, var2, fill = Correlation)) +
    geom_point(shape=21, colour = "black", size = 12)+
    theme_classic()+
    scale_fill_gradientn(colours = rev(brewer.pal(5, "Spectral")))+
    labs(title = "Correlation", x = "Outset", y = "End") +
    theme(axis.text = element_text(size = 10, colour = "black"), axis.title = element_text(size = 10),plot.title = element_text(size=15))+
    geom_text(aes(label = label), size = 5, colour = "black")
```

## 1D plot
```{r}
TIL_corr_melt.1d <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "TIL_Corr_1D")
TIL_corr_melt.1d <- as.data.frame(TIL_corr_melt.1d)
# -0.122419365	0.138273993
#TIL_corr_melt.1d[TIL_corr_melt.1d$var1 == "CD4_Treg" & TIL_corr_melt.1d$var2 == "PanCKsingle", c("Correlation","P-Value")] <- c(-0.122419365, 0.138273993)
TIL_corr_melt.1d[,"P_adjust"] <- p.adjust(TIL_corr_melt.1d$`P-Value`,method = "BH")
TIL_corr_melt.1d[TIL_corr_melt.1d$P_adjust <= 0.001,"label"] <- "***"
TIL_corr_melt.1d[TIL_corr_melt.1d$P_adjust > 0.001 & TIL_corr_melt.1d$P_adjust <= 0.01,"label"] <- "**"
TIL_corr_melt.1d[TIL_corr_melt.1d$P_adjust > 0.01 & TIL_corr_melt.1d$P_adjust <= 0.05,"label"] <- "*"
```

```{r}
ggplot(TIL_corr_melt.1d, aes(var2, var1, fill = Correlation)) +
    geom_point(shape=21, colour = "black", size = 12)+
    theme_classic()+
    scale_fill_gradientn(colours = rev(brewer.pal(5, "Spectral")))+
    labs(title = "Correlation", x = "Outset", y = "End") +
    theme(axis.text = element_text(size = 10, colour = "black"), axis.title = element_text(size = 10),plot.title = element_text(size=15))+
    geom_text(aes(label = label), size = 5, colour = "black")
```




# Fig14 immunetherapy treated cohort
```{r}
library(readxl)
itCohort <- read_excel("~/Project/TCR/ImmuneCohort/GSE126044_gini_ab.xlsx", 
    sheet = "Sheet3")
View(itCohort)

itCohort <- as.data.frame(itCohort)
```

```{r}
f14a <- ggplot(itCohort, aes(x = factor(Responsiveness, level=c('Responder','Non-responder' )), y = Gini, fill=Responsiveness)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  scale_fill_manual(values = c("#00BFC4", "#F8766D")) +
  labs(title = "Responsiveness", x = "Responsiveness", y = "Gini coefficient") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)

f14a
```

```{r}
colnames(itCohort)[colnames(itCohort) == "Progression free survival \r\n(month)"] = "Progression free survival (month)"
# Use the cut off from our dataset
itCohort[itCohort$Gini >= 0.19, "giniGroup"] <- "High"
itCohort[itCohort$Gini < 0.19, "giniGroup"] <- "Low"
itCohort$giniGroup = factor(itCohort$giniGroup, levels = c("High", "Low"))

# construct the object
fig14b_test <- survfit(Surv(`Progression free survival (month)`, Status) ~ giniGroup, data = itCohort)

print(fig14b_test)

summary(fig14b_test)$table
```

```{r}
fig14b <- ggsurvplot(fig14b_test,
                        pval = TRUE,
                        palette = c("#e64b35", "#4dbbd5"),
                        break.time.by = 1,
                        ggtheme = theme_classic(),
                        legend.labs = c("High", "Low"),
                        risk.table = "abs_pct",
                        risk.table.y.text = FALSE,
                        xlab = "Time (months)",
                        title = "giniSurv",
                        xlim = c(0,14.5),
                        axes.offset = FALSE,
                        risk.table.fontsize = 2.5,
                        legend = c(0.95,0.97),
                        risk.table.title = "Number at risk by time: n (%)",
                        font.main = c(),
                     break.x.by=3
                     )


fig14b
```

```{r}
surv_median(fig14b_test)
```


# Figure 15 Lymph nodes
```{r}
library(stringr)
GiniClin_tumor$lymph <- str_sub(GiniClin_tumor$`pTNM edt 8 (PMI)`, -2,-1)

GiniClin_tumor[GiniClin_tumor$lymph == "N0", "Lymph_node"] = "N0"
GiniClin_tumor[GiniClin_tumor$lymph == "N1"| GiniClin_tumor$lymph == "N2", "Lymph_node"] = "N1/N2"

ggplot(GiniClin_tumor, aes(x = Lymph_node, y = GiniIndex, fill=Lymph_node)) +
  geom_boxplot(outlier.colour = "black", outlier.shape = 16, outlier.size = 2, colour = "black", notch = FALSE) +
  theme_classic() +
  #coord_flip() +
  #scale_color_npg() +
  labs(title = "lymph", x = "lymph") +
  theme(legend.position = "none",axis.text = element_text(size = 15, colour = "black"), axis.title = element_text(size = 15),plot.title = element_text(size=20)) +
  stat_compare_means(paired = NULL)
```

# Beta clone distance
```{r}
GiniDist <- read.table("/Users/huiyu818/Project/TCR/TCRdist//cloneSummary/beta_tumor_dist_rank.txt")
GiniDist <- as.data.frame(GiniDist)
```

```{r}
pheatmap(GiniDist,
         cluster_rows = F, cluster_cols = F,
         #cutree_cols = 3,
         color = colorRampPalette(brewer.pal(n = 7, name = "RdYlBu"))(100),
         fontsize = 5,
         show_colnames = T
)
```

```{r}
# Load the annotation
GiniDist_anno <-  read_excel("~/Project/TCR/TCRdist/cloneSummary/beta_tumor.xlsx", sheet = "Distance_DF_2")
GiniDist_anno <- as.data.frame(GiniDist_anno)
rownames(GiniDist_anno) <- GiniDist_anno$clone_id_new
```

```{r}
GiniDist_anno_color = list(
#  "GiniIndex" = colorRampPalette(brewer.pal(n = 9, name = "Reds"))(100),
  "Anti_virus" = c(Y = "#D45F51", N = "#B7D2E8"))
```

```{r}
pheatmap(GiniDist,
         cluster_rows = F, cluster_cols = F,
         color = colorRampPalette(brewer.pal(n = 7, name = "RdYlBu"))(100),
         annotation_col = GiniDist_anno["Anti_virus"],
         annotation_colors = GiniDist_anno_color,
         fontsize = 5,
         show_colnames = T
         )
```

```{r}
pheatmap(GiniDist,
         clustering_method = "ward.D2",
         clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
         color = colorRampPalette(brewer.pal(n = 7, name = "RdYlBu"))(100),
         annotation_col = GiniDist_anno["Anti_virus"],
         annotation_colors = GiniDist_anno_color,
         fontsize = 5,
         show_colnames = T
         )
```


```{r}
pheatmap(t(ihcHeatT_tumor_norm2), clustering_method = "ward.D2",
                  clustering_distance_rows = "euclidean", clustering_distance_cols = "euclidean",
                  cutree_cols = 3,
                  #color = colorRampPalette(brewer.pal(n = 7, name = "YlGnBu"))(100),
                  annotation_col = ihcAnno,
                  annotation_colors = ihcAnno_color,
                  fontsize = 5,
                  show_colnames = F
                  )
```


# Cancer testis antigens
```{r}
CTA_list <- read_excel("~/Project/TCR_Summary/Writing/SumSup.xlsx", sheet = "CTA")

CTA_list <- as.data.frame(CTA_list)
```

```{r}
Bomi2RC_tumor_norm <- read.csv("~/Project/Clinical/BOMI2/BOMI2_RNAseq_FPKM/RNASeq_counts_tumor_norm.txt", sep="\t")
```

## CTA list 1 (all from database)
```{r}
library(data.table)
GiniClin_CTA_ori <- as.data.frame(t(Bomi2RC_tumor_norm[CTA_list$Ensgid_1,]))

GiniClin_CTA_ori <- merge(GiniClin_tumor, GiniClin_CTA_ori, by.x = "ID" ,by.y = "row.names")
setnames(GiniClin_CTA_ori, old =  CTA_list[["Ensgid_1"]], new = CTA_list[["Gene_1"]])
```

```{r}
GiniClin_CTA_ori[1,37]
```

```{r}
CTA_corr <- rcorr(as.matrix(GiniClin_CTA_ori[,c(5,37:132)]), type = "spearman")

write.csv(CTA_corr$r, file = "~/Project/TCR/CTA/ctaCor.csv")
write.csv(CTA_corr$P, file = "~/Project/TCR/CTA/ctaPvalue.csv")
```

```{r}
CTA_corrT <- read_excel("~/Project/TCR/CTA/CTA.xlsx", sheet = "Sheet3")

CTA_corrT <- as.data.frame(CTA_corrT)

CTA_corrT <- rename(CTA_corrT, c("GeneSymbol"="...1"))
CTA_corrT[,"padj"] <- p.adjust(CTA_corrT$`P-value`,method = "BH")

write.csv(CTA_corrT, file = "~/Project/TCR/CTA/ctaPadj.csv")
```

```{r}
CTA_corrT[CTA_corrT$padj < 0.05,]
```

## CTA list 2 (noval list)
```{r}
GiniClin_CTA_ori_2 <- as.data.frame(t(Bomi2RC_tumor_norm[CTA_list[!is.na(CTA_list$Ensgid_2),"Ensgid_2"],]))

GiniClin_CTA_ori_2 <- merge(GiniClin_tumor, GiniClin_CTA_ori_2, by.x = "ID" ,by.y = "row.names")
setnames(GiniClin_CTA_ori_2, old =  CTA_list[!is.na(CTA_list$Ensgid_2),"Ensgid_2"], new = CTA_list[!is.na(CTA_list$Ensgid_2),"Gene_2"])
```

```{r}
GiniClin_CTA_ori_2[3,37]
```

```{r}
CTA_corr_2 <- rcorr(as.matrix(GiniClin_CTA_ori_2[,c(5,37:126)]), type = "spearman")

write.csv(CTA_corr_2$r, file = "~/Project/TCR/CTA/ctaCor_list2.csv")
write.csv(CTA_corr_2$P, file = "~/Project/TCR/CTA/ctaPvalue_list2.csv")
```

```{r}
CTA_corrT_2 <- read_excel("~/Project/TCR/CTA/CTA.xlsx", sheet = "GeneList2")
CTA_corrT_2 <- as.data.frame(CTA_corrT_2)

CTA_corrT_2[,"padj"] <- p.adjust(CTA_corrT_2$Pvalue,method = "BH")
```


## Vocanol plot
```{r}
ggplot(data = diffGene, aes(x = log2FoldChange, y = -log10(padj), color = sign)) + 
  geom_point(size = 1) +  #Create scatter plot
  scale_color_manual(values = c('red', 'gray', 'green'), limits = c('UP', 'none', 'DOWN')) +  #Set the color
  labs(x = 'log2 Fold Change', y = '-log10 adjust p-value', title = 'High clonality vs Low clonality', color = '') +  #Set the labels
  theme(plot.title = element_text(hjust = 0.5, size = 14), panel.grid = element_blank(), #Change the them and grid lines
  panel.background = element_rect(color = 'black', fill = 'transparent'), 
  legend.key = element_rect(fill = 'transparent')) +
  geom_vline(xintercept = c(-1, 1), lty = 3, color = 'black') +  #Add threshold line
  geom_hline(yintercept = 2, lty = 3, color = 'black') +
  xlim(-12, 12) + ylim(0, 35) + #Set the curve boundary
  geom_text_repel(
    data = diffGene,
    aes(label = label),
    size = 3, segment.color = "blue", show.legned = FALSE,
    point.padding = unit(0.8, "lines")
  )
```


# ISS plot
```{r}
ISS_dot_data <- read_excel("~/Project/TCR/ISSimmunopanels/Analysis/DotPlot sample.xlsx")
View(ISS_dot_data)

ISS_dot_data <- as.data.frame(ISS_dot_data)
```

```{r}
ggplot(ISS_dot_data, aes(Clone, EPCAM_CDH1_anno)) +
  geom_point(aes(size=Clone_fraction, color = Clone_mean))+
  facet_wrap(~ Sample, scales = "free_x", drop = TRUE) +
  theme_bw()+
  theme(panel.grid.major = element_blank(),  # Remove major grid lines
        panel.grid.minor = element_blank()) + # Remove minor grid lines
  scale_size(range = c(1, 10)) +
  scale_color_gradientn(colors = rev(brewer.pal(5, "Spectral")))+
  labs(title = "Bubble plot grouped by sample", x = "Clone", y = "EPCAM_CDH1 annotation") +
  theme(axis.text.x = element_text(size = 10, colour = "black", angle = 45), axis.title = element_text(size = 10),plot.title = element_text(size=15))
  #geom_text(aes(label = label), size = 5, colour = "black")
```

```{r}
library(dplyr)

ISS_dot_data <- ISS_dot_data %>%
  group_by(Sample) %>%
  mutate(Normalized_Clone_fraction = scale(Clone_fraction),  # Normalizing clone fraction
         Normalized_Clone_mean = scale(Clone_mean)) %>%  # Normalizing clone mean
  ungroup()
```


```{r}
ISS_dot_data_L766 <- ISS_dot_data[ISS_dot_data$Sample == "L766T",]
```

```{r}
ggplot(ISS_dot_data_L766, aes(Clone, EPCAM_CDH1_anno)) +
  geom_point(aes(size=Clone_fraction, color = Clone_mean))+
  theme_bw()+
  theme(panel.grid.major = element_blank(),  # Remove major grid lines
        panel.grid.minor = element_blank()) + # Remove minor grid lines
  scale_color_gradientn(colors = rev(brewer.pal(5, "Spectral")))+
  labs(title = "Bubble plot grouped by sample", x = "Clone", y = "EPCAM_CDH1 annotation") +
  theme(axis.text.x = element_text(size = 10, colour = "black", angle = 45), axis.title = element_text(size = 10),plot.title = element_text(size=15))
```

```{r}
ISS_dot_data_L596 <- ISS_dot_data[ISS_dot_data$Sample == "L596T",]
```

```{r}
ggplot(ISS_dot_data_L596, aes(Clone, EPCAM_CDH1_anno)) +
  geom_point(aes(size=Clone_fraction, color = Clone_mean))+
  theme_bw()+
  theme(panel.grid.major = element_blank(),  # Remove major grid lines
        panel.grid.minor = element_blank()) + # Remove minor grid lines
  scale_color_gradientn(colors = rev(brewer.pal(5, "Spectral")))+
  scale_size(range = c(1, 10)) +
  labs(title = "Bubble plot grouped by sample", x = "Clone", y = "EPCAM_CDH1 annotation") +
  theme(axis.text.x = element_text(size = 10, colour = "black", angle = 45), axis.title = element_text(size = 10),plot.title = element_text(size=15))
```










